def it_works(darwin_client: Client, dataset_name: str, dataset_slug: str, team_slug: str, files_content: dict): remote_dataset = RemoteDataset(client=darwin_client, team=team_slug, name=dataset_name, slug=dataset_slug, dataset_id=1) url = "http://localhost/api/datasets/1/items?page%5Bsize%5D=500" responses.add( responses.POST, url, json=files_content, status=200, ) actual = remote_dataset.fetch_remote_files() assert isinstance(actual, types.GeneratorType) (item_1, item_2) = list(actual) assert responses.assert_call_count(url, 1) is True assert item_1.id == 386074 assert item_2.id == 386073
def it_gets_latest_release_when_not_given_one( system_mock: MagicMock, remote_dataset: RemoteDataset): stub_release_response = Release( "dataset-slug", "team-slug", "0.1.0", "release-name", "http://darwin-fake-url.com", datetime.now(), None, None, True, True, "json", ) def fake_download_zip(self, path): zip: Path = Path("tests/dataset.zip") shutil.copy(zip, path) return path with patch.object( RemoteDataset, "get_release", return_value=stub_release_response) as get_release_stub: with patch.object(Release, "download_zip", new=fake_download_zip): remote_dataset.pull() get_release_stub.assert_called_once()
def it_does_not_create_symlink_on_windows(mocker: MagicMock, remote_dataset: RemoteDataset): stub_release_response = Release( "dataset-slug", "team-slug", "0.1.0", "release-name", "http://darwin-fake-url.com", datetime.now(), None, None, True, True, "json", ) def fake_download_zip(self, path): zip: Path = Path("tests/dataset.zip") shutil.copy(zip, path) return path latest: Path = remote_dataset.local_releases_path / "latest" with patch.object(RemoteDataset, "get_release", return_value=stub_release_response): with patch.object(Release, "download_zip", new=fake_download_zip): remote_dataset.pull() assert not latest.is_symlink()
def it_continues_if_symlink_creation_fails(system_mock: MagicMock, remote_dataset: RemoteDataset): stub_release_response = Release( "dataset-slug", "team-slug", "0.1.0", "release-name", "http://darwin-fake-url.com", datetime.now(), None, None, True, True, "json", ) def fake_download_zip(self, path): zip: Path = Path("tests/dataset.zip") shutil.copy(zip, path) return path latest: Path = remote_dataset.local_releases_path / "latest" with patch.object(Path, "symlink_to") as mock_symlink_to: with patch.object(RemoteDataset, "get_release", return_value=stub_release_response): with patch.object(Release, "download_zip", new=fake_download_zip): mock_symlink_to.side_effect = OSError() remote_dataset.pull() assert not latest.is_symlink()
def calls_client_put(remote_dataset: RemoteDataset, dataset_item: DatasetItem, team_slug: str, dataset_slug: str): with patch.object(Client, "reset_item", return_value={}) as stub: remote_dataset.reset([dataset_item]) stub.assert_called_once_with(dataset_slug, team_slug, {"filter": { "dataset_item_ids": [1] }})
def calls_client_delete(remote_dataset: RemoteDataset, dataset_item: DatasetItem, team_slug: str, dataset_slug: str): with patch.object(Client, "delete_item", return_value={}) as stub: remote_dataset.delete_items([dataset_item]) stub.assert_called_once_with("test-dataset", "v7", {"filter": { "dataset_item_ids": [1] }})
def assert_upload_mocks_are_correctly_called(remote_dataset: RemoteDataset, *args): with patch.object(UploadHandler, "_request_upload", return_value=([], [])) as request_upload_mock: with patch.object(UploadHandler, "upload") as upload_mock: remote_dataset.push(*args) request_upload_mock.assert_called_once() upload_mock.assert_called_once_with(multi_threaded=True, progress_callback=None, file_upload_callback=None)
def create_dataset(self, name: str, team_slug: Optional[str] = None) -> RemoteDataset: """ Create a remote dataset. Parameters ---------- name : str Name of the dataset to create. team_slug: Optional[str] Team slug of the team the dataset will belong to. Defaults to None. Returns ------- RemoteDataset The created dataset. """ dataset: Dict[str, Any] = cast( Dict[str, Any], self._post("/datasets", {"name": name}, team_slug=team_slug)) return RemoteDataset( name=dataset["name"], team=team_slug or self.default_team, slug=dataset["slug"], dataset_id=dataset["id"], item_count=dataset["num_images"], progress=0, client=self, )
def create_dataset(self, name: str, team: Optional[str] = None) -> RemoteDataset: """Create a remote dataset Parameters ---------- name : str Name of the dataset to create Returns ------- RemoteDataset The created dataset """ dataset = self.post("/datasets", {"name": name}, team=team, error_handlers=[name_taken, validation_error]) return RemoteDataset( name=dataset["name"], team=team or self.default_team, slug=dataset["slug"], dataset_id=dataset["id"], image_count=dataset["num_images"], progress=dataset["progress"], client=self, )
def remote_dataset(dataset_slug: str, local_config_file: Config): client = Client(local_config_file) return RemoteDataset(client=client, team="v7", name="TEST_DATASET", slug=dataset_slug, dataset_id=1)
def list_remote_datasets(self, team_slug: Optional[str] = None ) -> Iterator[RemoteDataset]: """ Returns a list of all available datasets with the team currently authenticated against. Parameters ---------- team_slug: Optional[str] The team slug of the dataset. Defaults to None. Returns ------- Iterator[RemoteDataset] List of all remote datasets """ response: List[Dict[str, Any]] = cast( List[Dict[str, Any]], self._get("/datasets/", team_slug=team_slug)) for dataset in response: yield RemoteDataset( name=dataset["name"], slug=dataset["slug"], team=team_slug or self.default_team, dataset_id=dataset["id"], item_count=dataset["num_images"] + dataset["num_videos"], progress=dataset["progress"], client=self, )
def remote_dataset(darwin_client: Client, dataset_name: str, dataset_slug: str, team_slug: str): return RemoteDataset(client=darwin_client, team=team_slug, name=dataset_name, slug=dataset_slug, dataset_id=1)
def it_raises_if_release_format_is_not_json(system_mock: MagicMock, remote_dataset: RemoteDataset): a_release = Release( remote_dataset.slug, remote_dataset.team, "0.1.0", "release-name", "http://darwin-fake-url.com", datetime.now(), None, None, True, True, "xml", ) with pytest.raises(UnsupportedExportFormat): remote_dataset.pull(release=a_release)
def get_remote_dataset( self, dataset_identifier: Union[str, DatasetIdentifier]) -> RemoteDataset: """Get a remote dataset based on the parameter passed. You can only choose one of the possible parameters and calling this method with multiple ones will result in an error. Parameters ---------- dataset_identifier : int ID of the dataset to return Returns ------- RemoteDataset Initialized dataset """ if isinstance(dataset_identifier, str): dataset_identifier = DatasetIdentifier.parse(dataset_identifier) if not dataset_identifier.team_slug: dataset_identifier.team_slug = self.default_team try: matching_datasets = [ dataset for dataset in self.list_remote_datasets( team=dataset_identifier.team_slug) if dataset.slug == dataset_identifier.dataset_slug ] except Unauthorized: # There is a chance that we tried to access an open dataset dataset = self.get( f"{dataset_identifier.team_slug}/{dataset_identifier.dataset_slug}" ) # If there isn't a record of this team, create one. if not self.config.get_team(dataset_identifier.team_slug, raise_on_invalid_team=False): datasets_dir = Path.home() / ".darwin" / "datasets" self.config.set_team(team=dataset_identifier.team_slug, api_key="", datasets_dir=str(datasets_dir)) return RemoteDataset( name=dataset["name"], slug=dataset["slug"], team=dataset_identifier.team_slug, dataset_id=dataset["id"], image_count=dataset["num_images"], progress=0, client=self, ) if not matching_datasets: raise NotFound(dataset_identifier) return matching_datasets[0]
def list_remote_datasets(self, team: Optional[str] = None ) -> Iterator[RemoteDataset]: """Returns a list of all available datasets with the team currently authenticated against Returns ------- list[RemoteDataset] List of all remote datasets """ for dataset in self.get("/datasets/", team=team): yield RemoteDataset( name=dataset["name"], slug=dataset["slug"], team=team or self.default_team, dataset_id=dataset["id"], image_count=dataset["num_images"], progress=dataset["progress"], client=self, )
def get_remote_dataset( self, dataset_identifier: Union[str, DatasetIdentifier]) -> RemoteDataset: """ Get a remote dataset based on its identifier. Parameters ---------- dataset_identifier : Union[str, DatasetIdentifier] Identifier of the dataset. Can be the string version or a DatasetIdentifier object. Returns ------- RemoteDataset Initialized dataset. Raises ------- NotFound If no dataset with the given identifier was found. """ parsed_dataset_identifier: DatasetIdentifier = DatasetIdentifier.parse( dataset_identifier) if not parsed_dataset_identifier.team_slug: parsed_dataset_identifier.team_slug = self.default_team try: matching_datasets: List[RemoteDataset] = [ dataset for dataset in self.list_remote_datasets( team_slug=parsed_dataset_identifier.team_slug) if dataset.slug == parsed_dataset_identifier.dataset_slug ] except Unauthorized: # There is a chance that we tried to access an open dataset dataset: Dict[str, Any] = cast( Dict[str, Any], self._get( f"{parsed_dataset_identifier.team_slug}/{parsed_dataset_identifier.dataset_slug}" ), ) # If there isn't a record of this team, create one. if not self.config.get_team(parsed_dataset_identifier.team_slug, raise_on_invalid_team=False): datasets_dir: Path = Path.home() / ".darwin" / "datasets" self.config.set_team(team=parsed_dataset_identifier.team_slug, api_key="", datasets_dir=str(datasets_dir)) return RemoteDataset( name=dataset["name"], slug=dataset["slug"], team=parsed_dataset_identifier.team_slug, dataset_id=dataset["id"], item_count=dataset["num_images"] + dataset["num_videos"], progress=0, client=self, ) if not matching_datasets: raise NotFound(str(parsed_dataset_identifier)) return matching_datasets[0]
def dataset(darwin_client: Client, team_slug: str, dataset_slug: str) -> RemoteDataset: return RemoteDataset(client=darwin_client, team=team_slug, name=dataset_slug, slug=dataset_slug, dataset_id=1)
def it_works_on_videos( darwin_client: Client, darwin_datasets_path: Path, dataset_name: str, dataset_slug: str, release_name: str, team_slug: str, ): remote_dataset = RemoteDataset(client=darwin_client, team=team_slug, name=dataset_name, slug=dataset_slug, dataset_id=1) remote_dataset.split_video_annotations() video_path = (darwin_datasets_path / team_slug / dataset_slug / "releases" / release_name / "annotations" / "test_video") assert video_path.exists() assert (video_path / "0000000.json").exists() assert (video_path / "0000001.json").exists() assert (video_path / "0000002.json").exists() assert not (video_path / "0000003.json").exists() with (video_path / "0000000.json").open() as f: assert json.load(f) == { "annotations": [{ "name": "test_class", "polygon": { "path": [{ "x": 0, "y": 0 }, { "x": 1, "y": 1 }, { "x": 1, "y": 0 }] } }], "image": { "filename": "test_video/0000000.png", "height": 1080, "url": "frame_1.jpg", "width": 1920 }, } with (video_path / "0000001.json").open() as f: assert json.load(f) == { "annotations": [], "image": { "filename": "test_video/0000001.png", "height": 1080, "url": "frame_2.jpg", "width": 1920 }, } with (video_path / "0000002.json").open() as f: assert json.load(f) == { "annotations": [{ "name": "test_class", "polygon": { "path": [{ "x": 5, "y": 5 }, { "x": 6, "y": 6 }, { "x": 6, "y": 5 }] } }], "image": { "filename": "test_video/0000002.png", "height": 1080, "url": "frame_3.jpg", "width": 1920 }, }
def raises_if_files_are_not_provided(remote_dataset: RemoteDataset): with pytest.raises(ValueError): remote_dataset.push(None)
def raises_with_unsupported_files(remote_dataset: RemoteDataset): with pytest.raises(UnsupportedFileType): remote_dataset.push(["test.txt"])
def raises_if_both_as_frames_and_local_files_are_given( remote_dataset: RemoteDataset): with pytest.raises(ValueError): remote_dataset.push([LocalFile("test.jpg")], as_frames=True)