Example #1
0
    def it_works(darwin_client: Client, dataset_name: str, dataset_slug: str,
                 team_slug: str, files_content: dict):
        remote_dataset = RemoteDataset(client=darwin_client,
                                       team=team_slug,
                                       name=dataset_name,
                                       slug=dataset_slug,
                                       dataset_id=1)
        url = "http://localhost/api/datasets/1/items?page%5Bsize%5D=500"
        responses.add(
            responses.POST,
            url,
            json=files_content,
            status=200,
        )

        actual = remote_dataset.fetch_remote_files()

        assert isinstance(actual, types.GeneratorType)

        (item_1, item_2) = list(actual)

        assert responses.assert_call_count(url, 1) is True

        assert item_1.id == 386074
        assert item_2.id == 386073
Example #2
0
    def it_gets_latest_release_when_not_given_one(
            system_mock: MagicMock, remote_dataset: RemoteDataset):
        stub_release_response = Release(
            "dataset-slug",
            "team-slug",
            "0.1.0",
            "release-name",
            "http://darwin-fake-url.com",
            datetime.now(),
            None,
            None,
            True,
            True,
            "json",
        )

        def fake_download_zip(self, path):
            zip: Path = Path("tests/dataset.zip")
            shutil.copy(zip, path)
            return path

        with patch.object(
                RemoteDataset, "get_release",
                return_value=stub_release_response) as get_release_stub:
            with patch.object(Release, "download_zip", new=fake_download_zip):
                remote_dataset.pull()
                get_release_stub.assert_called_once()
Example #3
0
    def it_does_not_create_symlink_on_windows(mocker: MagicMock,
                                              remote_dataset: RemoteDataset):
        stub_release_response = Release(
            "dataset-slug",
            "team-slug",
            "0.1.0",
            "release-name",
            "http://darwin-fake-url.com",
            datetime.now(),
            None,
            None,
            True,
            True,
            "json",
        )

        def fake_download_zip(self, path):
            zip: Path = Path("tests/dataset.zip")
            shutil.copy(zip, path)
            return path

        latest: Path = remote_dataset.local_releases_path / "latest"

        with patch.object(RemoteDataset,
                          "get_release",
                          return_value=stub_release_response):
            with patch.object(Release, "download_zip", new=fake_download_zip):
                remote_dataset.pull()
                assert not latest.is_symlink()
Example #4
0
    def it_continues_if_symlink_creation_fails(system_mock: MagicMock,
                                               remote_dataset: RemoteDataset):
        stub_release_response = Release(
            "dataset-slug",
            "team-slug",
            "0.1.0",
            "release-name",
            "http://darwin-fake-url.com",
            datetime.now(),
            None,
            None,
            True,
            True,
            "json",
        )

        def fake_download_zip(self, path):
            zip: Path = Path("tests/dataset.zip")
            shutil.copy(zip, path)
            return path

        latest: Path = remote_dataset.local_releases_path / "latest"

        with patch.object(Path, "symlink_to") as mock_symlink_to:
            with patch.object(RemoteDataset,
                              "get_release",
                              return_value=stub_release_response):
                with patch.object(Release,
                                  "download_zip",
                                  new=fake_download_zip):
                    mock_symlink_to.side_effect = OSError()
                    remote_dataset.pull()
                    assert not latest.is_symlink()
Example #5
0
 def calls_client_put(remote_dataset: RemoteDataset,
                      dataset_item: DatasetItem, team_slug: str,
                      dataset_slug: str):
     with patch.object(Client, "reset_item", return_value={}) as stub:
         remote_dataset.reset([dataset_item])
         stub.assert_called_once_with(dataset_slug, team_slug,
                                      {"filter": {
                                          "dataset_item_ids": [1]
                                      }})
Example #6
0
 def calls_client_delete(remote_dataset: RemoteDataset,
                         dataset_item: DatasetItem, team_slug: str,
                         dataset_slug: str):
     with patch.object(Client, "delete_item", return_value={}) as stub:
         remote_dataset.delete_items([dataset_item])
         stub.assert_called_once_with("test-dataset", "v7",
                                      {"filter": {
                                          "dataset_item_ids": [1]
                                      }})
Example #7
0
def assert_upload_mocks_are_correctly_called(remote_dataset: RemoteDataset,
                                             *args):
    with patch.object(UploadHandler, "_request_upload",
                      return_value=([], [])) as request_upload_mock:
        with patch.object(UploadHandler, "upload") as upload_mock:
            remote_dataset.push(*args)

            request_upload_mock.assert_called_once()
            upload_mock.assert_called_once_with(multi_threaded=True,
                                                progress_callback=None,
                                                file_upload_callback=None)
Example #8
0
    def create_dataset(self,
                       name: str,
                       team_slug: Optional[str] = None) -> RemoteDataset:
        """
        Create a remote dataset.

        Parameters
        ----------
        name : str
            Name of the dataset to create.
        team_slug: Optional[str]
            Team slug of the team the dataset will belong to. Defaults to None.

        Returns
        -------
        RemoteDataset
            The created dataset.
        """
        dataset: Dict[str, Any] = cast(
            Dict[str, Any],
            self._post("/datasets", {"name": name}, team_slug=team_slug))
        return RemoteDataset(
            name=dataset["name"],
            team=team_slug or self.default_team,
            slug=dataset["slug"],
            dataset_id=dataset["id"],
            item_count=dataset["num_images"],
            progress=0,
            client=self,
        )
Example #9
0
    def create_dataset(self,
                       name: str,
                       team: Optional[str] = None) -> RemoteDataset:
        """Create a remote dataset

        Parameters
        ----------
        name : str
            Name of the dataset to create

        Returns
        -------
        RemoteDataset
        The created dataset
        """
        dataset = self.post("/datasets", {"name": name},
                            team=team,
                            error_handlers=[name_taken, validation_error])
        return RemoteDataset(
            name=dataset["name"],
            team=team or self.default_team,
            slug=dataset["slug"],
            dataset_id=dataset["id"],
            image_count=dataset["num_images"],
            progress=dataset["progress"],
            client=self,
        )
Example #10
0
def remote_dataset(dataset_slug: str, local_config_file: Config):
    client = Client(local_config_file)
    return RemoteDataset(client=client,
                         team="v7",
                         name="TEST_DATASET",
                         slug=dataset_slug,
                         dataset_id=1)
Example #11
0
    def list_remote_datasets(self,
                             team_slug: Optional[str] = None
                             ) -> Iterator[RemoteDataset]:
        """
        Returns a list of all available datasets with the team currently authenticated against.

        Parameters
        ----------
        team_slug: Optional[str]
            The team slug of the dataset. Defaults to None.

        Returns
        -------
        Iterator[RemoteDataset]
            List of all remote datasets
        """
        response: List[Dict[str, Any]] = cast(
            List[Dict[str, Any]], self._get("/datasets/", team_slug=team_slug))

        for dataset in response:
            yield RemoteDataset(
                name=dataset["name"],
                slug=dataset["slug"],
                team=team_slug or self.default_team,
                dataset_id=dataset["id"],
                item_count=dataset["num_images"] + dataset["num_videos"],
                progress=dataset["progress"],
                client=self,
            )
Example #12
0
def remote_dataset(darwin_client: Client, dataset_name: str, dataset_slug: str,
                   team_slug: str):
    return RemoteDataset(client=darwin_client,
                         team=team_slug,
                         name=dataset_name,
                         slug=dataset_slug,
                         dataset_id=1)
Example #13
0
    def it_raises_if_release_format_is_not_json(system_mock: MagicMock,
                                                remote_dataset: RemoteDataset):
        a_release = Release(
            remote_dataset.slug,
            remote_dataset.team,
            "0.1.0",
            "release-name",
            "http://darwin-fake-url.com",
            datetime.now(),
            None,
            None,
            True,
            True,
            "xml",
        )

        with pytest.raises(UnsupportedExportFormat):
            remote_dataset.pull(release=a_release)
Example #14
0
    def get_remote_dataset(
            self,
            dataset_identifier: Union[str,
                                      DatasetIdentifier]) -> RemoteDataset:
        """Get a remote dataset based on the parameter passed. You can only choose one of the
        possible parameters and calling this method with multiple ones will result in an
        error.

        Parameters
        ----------
        dataset_identifier : int
            ID of the dataset to return

        Returns
        -------
        RemoteDataset
            Initialized dataset
        """
        if isinstance(dataset_identifier, str):
            dataset_identifier = DatasetIdentifier.parse(dataset_identifier)
        if not dataset_identifier.team_slug:
            dataset_identifier.team_slug = self.default_team

        try:
            matching_datasets = [
                dataset for dataset in self.list_remote_datasets(
                    team=dataset_identifier.team_slug)
                if dataset.slug == dataset_identifier.dataset_slug
            ]
        except Unauthorized:
            # There is a chance that we tried to access an open dataset
            dataset = self.get(
                f"{dataset_identifier.team_slug}/{dataset_identifier.dataset_slug}"
            )

            # If there isn't a record of this team, create one.
            if not self.config.get_team(dataset_identifier.team_slug,
                                        raise_on_invalid_team=False):
                datasets_dir = Path.home() / ".darwin" / "datasets"
                self.config.set_team(team=dataset_identifier.team_slug,
                                     api_key="",
                                     datasets_dir=str(datasets_dir))

            return RemoteDataset(
                name=dataset["name"],
                slug=dataset["slug"],
                team=dataset_identifier.team_slug,
                dataset_id=dataset["id"],
                image_count=dataset["num_images"],
                progress=0,
                client=self,
            )
        if not matching_datasets:
            raise NotFound(dataset_identifier)
        return matching_datasets[0]
Example #15
0
    def list_remote_datasets(self,
                             team: Optional[str] = None
                             ) -> Iterator[RemoteDataset]:
        """Returns a list of all available datasets with the team currently authenticated against

        Returns
        -------
        list[RemoteDataset]
        List of all remote datasets
        """
        for dataset in self.get("/datasets/", team=team):
            yield RemoteDataset(
                name=dataset["name"],
                slug=dataset["slug"],
                team=team or self.default_team,
                dataset_id=dataset["id"],
                image_count=dataset["num_images"],
                progress=dataset["progress"],
                client=self,
            )
Example #16
0
    def get_remote_dataset(
            self,
            dataset_identifier: Union[str,
                                      DatasetIdentifier]) -> RemoteDataset:
        """
        Get a remote dataset based on its identifier.

        Parameters
        ----------
        dataset_identifier : Union[str, DatasetIdentifier]
            Identifier of the dataset. Can be the string version or a DatasetIdentifier object.

        Returns
        -------
        RemoteDataset
            Initialized dataset.

        Raises
        -------
        NotFound
            If no dataset with the given identifier was found.
        """
        parsed_dataset_identifier: DatasetIdentifier = DatasetIdentifier.parse(
            dataset_identifier)

        if not parsed_dataset_identifier.team_slug:
            parsed_dataset_identifier.team_slug = self.default_team

        try:
            matching_datasets: List[RemoteDataset] = [
                dataset for dataset in self.list_remote_datasets(
                    team_slug=parsed_dataset_identifier.team_slug)
                if dataset.slug == parsed_dataset_identifier.dataset_slug
            ]
        except Unauthorized:
            # There is a chance that we tried to access an open dataset
            dataset: Dict[str, Any] = cast(
                Dict[str, Any],
                self._get(
                    f"{parsed_dataset_identifier.team_slug}/{parsed_dataset_identifier.dataset_slug}"
                ),
            )

            # If there isn't a record of this team, create one.
            if not self.config.get_team(parsed_dataset_identifier.team_slug,
                                        raise_on_invalid_team=False):
                datasets_dir: Path = Path.home() / ".darwin" / "datasets"
                self.config.set_team(team=parsed_dataset_identifier.team_slug,
                                     api_key="",
                                     datasets_dir=str(datasets_dir))

            return RemoteDataset(
                name=dataset["name"],
                slug=dataset["slug"],
                team=parsed_dataset_identifier.team_slug,
                dataset_id=dataset["id"],
                item_count=dataset["num_images"] + dataset["num_videos"],
                progress=0,
                client=self,
            )
        if not matching_datasets:
            raise NotFound(str(parsed_dataset_identifier))
        return matching_datasets[0]
Example #17
0
def dataset(darwin_client: Client, team_slug: str, dataset_slug: str) -> RemoteDataset:
    return RemoteDataset(client=darwin_client, team=team_slug, name=dataset_slug, slug=dataset_slug, dataset_id=1)
Example #18
0
    def it_works_on_videos(
        darwin_client: Client,
        darwin_datasets_path: Path,
        dataset_name: str,
        dataset_slug: str,
        release_name: str,
        team_slug: str,
    ):
        remote_dataset = RemoteDataset(client=darwin_client,
                                       team=team_slug,
                                       name=dataset_name,
                                       slug=dataset_slug,
                                       dataset_id=1)

        remote_dataset.split_video_annotations()

        video_path = (darwin_datasets_path / team_slug / dataset_slug /
                      "releases" / release_name / "annotations" / "test_video")
        assert video_path.exists()

        assert (video_path / "0000000.json").exists()
        assert (video_path / "0000001.json").exists()
        assert (video_path / "0000002.json").exists()
        assert not (video_path / "0000003.json").exists()

        with (video_path / "0000000.json").open() as f:
            assert json.load(f) == {
                "annotations": [{
                    "name": "test_class",
                    "polygon": {
                        "path": [{
                            "x": 0,
                            "y": 0
                        }, {
                            "x": 1,
                            "y": 1
                        }, {
                            "x": 1,
                            "y": 0
                        }]
                    }
                }],
                "image": {
                    "filename": "test_video/0000000.png",
                    "height": 1080,
                    "url": "frame_1.jpg",
                    "width": 1920
                },
            }

        with (video_path / "0000001.json").open() as f:
            assert json.load(f) == {
                "annotations": [],
                "image": {
                    "filename": "test_video/0000001.png",
                    "height": 1080,
                    "url": "frame_2.jpg",
                    "width": 1920
                },
            }

        with (video_path / "0000002.json").open() as f:
            assert json.load(f) == {
                "annotations": [{
                    "name": "test_class",
                    "polygon": {
                        "path": [{
                            "x": 5,
                            "y": 5
                        }, {
                            "x": 6,
                            "y": 6
                        }, {
                            "x": 6,
                            "y": 5
                        }]
                    }
                }],
                "image": {
                    "filename": "test_video/0000002.png",
                    "height": 1080,
                    "url": "frame_3.jpg",
                    "width": 1920
                },
            }
Example #19
0
 def raises_if_files_are_not_provided(remote_dataset: RemoteDataset):
     with pytest.raises(ValueError):
         remote_dataset.push(None)
Example #20
0
 def raises_with_unsupported_files(remote_dataset: RemoteDataset):
     with pytest.raises(UnsupportedFileType):
         remote_dataset.push(["test.txt"])
Example #21
0
 def raises_if_both_as_frames_and_local_files_are_given(
         remote_dataset: RemoteDataset):
     with pytest.raises(ValueError):
         remote_dataset.push([LocalFile("test.jpg")], as_frames=True)