Example #1
0
    def it_works(darwin_client: Client, dataset_name: str, dataset_slug: str,
                 team_slug: str, files_content: dict):
        remote_dataset = RemoteDataset(client=darwin_client,
                                       team=team_slug,
                                       name=dataset_name,
                                       slug=dataset_slug,
                                       dataset_id=1)
        url = "http://localhost/api/datasets/1/items?page%5Bsize%5D=500"
        responses.add(
            responses.POST,
            url,
            json=files_content,
            status=200,
        )

        actual = remote_dataset.fetch_remote_files()

        assert isinstance(actual, types.GeneratorType)

        (item_1, item_2) = list(actual)

        assert responses.assert_call_count(url, 1) is True

        assert item_1.id == 386074
        assert item_2.id == 386073
Example #2
0
def remote_dataset(darwin_client: Client, dataset_name: str, dataset_slug: str,
                   team_slug: str):
    return RemoteDataset(client=darwin_client,
                         team=team_slug,
                         name=dataset_name,
                         slug=dataset_slug,
                         dataset_id=1)
Example #3
0
    def create_dataset(self,
                       name: str,
                       team: Optional[str] = None) -> RemoteDataset:
        """Create a remote dataset

        Parameters
        ----------
        name : str
            Name of the dataset to create

        Returns
        -------
        RemoteDataset
        The created dataset
        """
        dataset = self.post("/datasets", {"name": name},
                            team=team,
                            error_handlers=[name_taken, validation_error])
        return RemoteDataset(
            name=dataset["name"],
            team=team or self.default_team,
            slug=dataset["slug"],
            dataset_id=dataset["id"],
            image_count=dataset["num_images"],
            progress=dataset["progress"],
            client=self,
        )
Example #4
0
def remote_dataset(dataset_slug: str, local_config_file: Config):
    client = Client(local_config_file)
    return RemoteDataset(client=client,
                         team="v7",
                         name="TEST_DATASET",
                         slug=dataset_slug,
                         dataset_id=1)
Example #5
0
    def list_remote_datasets(self,
                             team_slug: Optional[str] = None
                             ) -> Iterator[RemoteDataset]:
        """
        Returns a list of all available datasets with the team currently authenticated against.

        Parameters
        ----------
        team_slug: Optional[str]
            The team slug of the dataset. Defaults to None.

        Returns
        -------
        Iterator[RemoteDataset]
            List of all remote datasets
        """
        response: List[Dict[str, Any]] = cast(
            List[Dict[str, Any]], self._get("/datasets/", team_slug=team_slug))

        for dataset in response:
            yield RemoteDataset(
                name=dataset["name"],
                slug=dataset["slug"],
                team=team_slug or self.default_team,
                dataset_id=dataset["id"],
                item_count=dataset["num_images"] + dataset["num_videos"],
                progress=dataset["progress"],
                client=self,
            )
Example #6
0
    def create_dataset(self,
                       name: str,
                       team_slug: Optional[str] = None) -> RemoteDataset:
        """
        Create a remote dataset.

        Parameters
        ----------
        name : str
            Name of the dataset to create.
        team_slug: Optional[str]
            Team slug of the team the dataset will belong to. Defaults to None.

        Returns
        -------
        RemoteDataset
            The created dataset.
        """
        dataset: Dict[str, Any] = cast(
            Dict[str, Any],
            self._post("/datasets", {"name": name}, team_slug=team_slug))
        return RemoteDataset(
            name=dataset["name"],
            team=team_slug or self.default_team,
            slug=dataset["slug"],
            dataset_id=dataset["id"],
            item_count=dataset["num_images"],
            progress=0,
            client=self,
        )
Example #7
0
    def get_remote_dataset(
            self,
            dataset_identifier: Union[str,
                                      DatasetIdentifier]) -> RemoteDataset:
        """Get a remote dataset based on the parameter passed. You can only choose one of the
        possible parameters and calling this method with multiple ones will result in an
        error.

        Parameters
        ----------
        dataset_identifier : int
            ID of the dataset to return

        Returns
        -------
        RemoteDataset
            Initialized dataset
        """
        if isinstance(dataset_identifier, str):
            dataset_identifier = DatasetIdentifier.parse(dataset_identifier)
        if not dataset_identifier.team_slug:
            dataset_identifier.team_slug = self.default_team

        try:
            matching_datasets = [
                dataset for dataset in self.list_remote_datasets(
                    team=dataset_identifier.team_slug)
                if dataset.slug == dataset_identifier.dataset_slug
            ]
        except Unauthorized:
            # There is a chance that we tried to access an open dataset
            dataset = self.get(
                f"{dataset_identifier.team_slug}/{dataset_identifier.dataset_slug}"
            )

            # If there isn't a record of this team, create one.
            if not self.config.get_team(dataset_identifier.team_slug,
                                        raise_on_invalid_team=False):
                datasets_dir = Path.home() / ".darwin" / "datasets"
                self.config.set_team(team=dataset_identifier.team_slug,
                                     api_key="",
                                     datasets_dir=str(datasets_dir))

            return RemoteDataset(
                name=dataset["name"],
                slug=dataset["slug"],
                team=dataset_identifier.team_slug,
                dataset_id=dataset["id"],
                image_count=dataset["num_images"],
                progress=0,
                client=self,
            )
        if not matching_datasets:
            raise NotFound(dataset_identifier)
        return matching_datasets[0]
Example #8
0
    def list_remote_datasets(self,
                             team: Optional[str] = None
                             ) -> Iterator[RemoteDataset]:
        """Returns a list of all available datasets with the team currently authenticated against

        Returns
        -------
        list[RemoteDataset]
        List of all remote datasets
        """
        for dataset in self.get("/datasets/", team=team):
            yield RemoteDataset(
                name=dataset["name"],
                slug=dataset["slug"],
                team=team or self.default_team,
                dataset_id=dataset["id"],
                image_count=dataset["num_images"],
                progress=dataset["progress"],
                client=self,
            )
Example #9
0
    def it_works_on_videos(
        darwin_client: Client,
        darwin_datasets_path: Path,
        dataset_name: str,
        dataset_slug: str,
        release_name: str,
        team_slug: str,
    ):
        remote_dataset = RemoteDataset(client=darwin_client,
                                       team=team_slug,
                                       name=dataset_name,
                                       slug=dataset_slug,
                                       dataset_id=1)

        remote_dataset.split_video_annotations()

        video_path = (darwin_datasets_path / team_slug / dataset_slug /
                      "releases" / release_name / "annotations" / "test_video")
        assert video_path.exists()

        assert (video_path / "0000000.json").exists()
        assert (video_path / "0000001.json").exists()
        assert (video_path / "0000002.json").exists()
        assert not (video_path / "0000003.json").exists()

        with (video_path / "0000000.json").open() as f:
            assert json.load(f) == {
                "annotations": [{
                    "name": "test_class",
                    "polygon": {
                        "path": [{
                            "x": 0,
                            "y": 0
                        }, {
                            "x": 1,
                            "y": 1
                        }, {
                            "x": 1,
                            "y": 0
                        }]
                    }
                }],
                "image": {
                    "filename": "test_video/0000000.png",
                    "height": 1080,
                    "url": "frame_1.jpg",
                    "width": 1920
                },
            }

        with (video_path / "0000001.json").open() as f:
            assert json.load(f) == {
                "annotations": [],
                "image": {
                    "filename": "test_video/0000001.png",
                    "height": 1080,
                    "url": "frame_2.jpg",
                    "width": 1920
                },
            }

        with (video_path / "0000002.json").open() as f:
            assert json.load(f) == {
                "annotations": [{
                    "name": "test_class",
                    "polygon": {
                        "path": [{
                            "x": 5,
                            "y": 5
                        }, {
                            "x": 6,
                            "y": 6
                        }, {
                            "x": 6,
                            "y": 5
                        }]
                    }
                }],
                "image": {
                    "filename": "test_video/0000002.png",
                    "height": 1080,
                    "url": "frame_3.jpg",
                    "width": 1920
                },
            }
Example #10
0
def dataset(darwin_client: Client, team_slug: str, dataset_slug: str) -> RemoteDataset:
    return RemoteDataset(client=darwin_client, team=team_slug, name=dataset_slug, slug=dataset_slug, dataset_id=1)
Example #11
0
    def get_remote_dataset(
            self,
            dataset_identifier: Union[str,
                                      DatasetIdentifier]) -> RemoteDataset:
        """
        Get a remote dataset based on its identifier.

        Parameters
        ----------
        dataset_identifier : Union[str, DatasetIdentifier]
            Identifier of the dataset. Can be the string version or a DatasetIdentifier object.

        Returns
        -------
        RemoteDataset
            Initialized dataset.

        Raises
        -------
        NotFound
            If no dataset with the given identifier was found.
        """
        parsed_dataset_identifier: DatasetIdentifier = DatasetIdentifier.parse(
            dataset_identifier)

        if not parsed_dataset_identifier.team_slug:
            parsed_dataset_identifier.team_slug = self.default_team

        try:
            matching_datasets: List[RemoteDataset] = [
                dataset for dataset in self.list_remote_datasets(
                    team_slug=parsed_dataset_identifier.team_slug)
                if dataset.slug == parsed_dataset_identifier.dataset_slug
            ]
        except Unauthorized:
            # There is a chance that we tried to access an open dataset
            dataset: Dict[str, Any] = cast(
                Dict[str, Any],
                self._get(
                    f"{parsed_dataset_identifier.team_slug}/{parsed_dataset_identifier.dataset_slug}"
                ),
            )

            # If there isn't a record of this team, create one.
            if not self.config.get_team(parsed_dataset_identifier.team_slug,
                                        raise_on_invalid_team=False):
                datasets_dir: Path = Path.home() / ".darwin" / "datasets"
                self.config.set_team(team=parsed_dataset_identifier.team_slug,
                                     api_key="",
                                     datasets_dir=str(datasets_dir))

            return RemoteDataset(
                name=dataset["name"],
                slug=dataset["slug"],
                team=parsed_dataset_identifier.team_slug,
                dataset_id=dataset["id"],
                item_count=dataset["num_images"] + dataset["num_videos"],
                progress=0,
                client=self,
            )
        if not matching_datasets:
            raise NotFound(str(parsed_dataset_identifier))
        return matching_datasets[0]