Ejemplo n.º 1
0
class TestVersionControlClient:
    gas_client = GAS("Accesskey-********************************")
    dataset_client = DatasetClient(
        "test_dataset",
        "12345",
        gas_client,
        status=Status(DEFAULT_BRANCH, commit_id=ROOT_COMMIT_ID),
        alias="",
        is_public=DEFAULT_IS_PUBLIC,
    )

    def test_squash_and_merge(self, mocker):
        post_data = {
            "title": "squash_merge-1",
            "sourceBranchName": "branch-1",
            "targetBranchName": "branch-2",
            "strategy": "abort",
        }
        response_data = {"draftNumber": 2}
        open_api_do = mocker.patch(
            f"{gas.__name__}.Client.open_api_do",
            return_value=mock_response(data=response_data),
        )
        draft_number = self.dataset_client.squash_and_merge(
            "squash_merge-1",
            source_branch_name="branch-1",
            target_branch_name="branch-2",
            strategy="abort",
        )
        open_api_do.assert_called_once_with("POST",
                                            "squashAndMerge",
                                            self.dataset_client.dataset_id,
                                            json=post_data)
        assert draft_number == 2
Ejemplo n.º 2
0
class TestJobMixin:
    gas_client = GAS("Accesskey-********************************")
    dataset_client = DatasetClient(
        "test_dataset",
        "12345",
        gas_client,
        status=Status(DEFAULT_BRANCH, commit_id=ROOT_COMMIT_ID),
        alias="",
        is_public=False,
    )

    def test__create_job(self, mocker, mock_create_job):
        post_data = {
            "title": "test->main(abort)",
            "jobType": "squashAndMerge",
            "arguments": {"title": "draft-1"},
            "description": "12",
        }
        open_api_do, response_data = mock_create_job(mocker)
        response_data.update(
            title=post_data["title"],
            arguments=post_data["arguments"],
            status="QUEUING",
            description=post_data["description"],
        )
        assert response_data == self.dataset_client.squash_and_merge._create_job(
            post_data["title"],
            post_data["arguments"],
            post_data["description"],
        )
        open_api_do.assert_called_once_with(
            "POST", "jobs", self.dataset_client.dataset_id, json=post_data
        )

    def test__get_job(self, mocker, mock_get_job):
        job_id = "123"
        job_type = "squashAndMerge"
        open_api_do, response_data = mock_get_job(mocker)
        assert response_data == self.dataset_client.squash_and_merge._get_job(job_id)
        open_api_do.assert_called_once_with(
            "GET", f"jobs/{job_id}", self.dataset_client.dataset_id, params={"jobType": job_type}
        )

    def test__list_jobs(self, mocker, mock_list_jobs):
        params = {
            "jobType": "squashAndMerge",
            "status": None,
            "offset": 0,
            "limit": 128,
        }
        open_api_do, response_data = mock_list_jobs(mocker)
        assert response_data == self.dataset_client.squash_and_merge._list_jobs(
            params["status"], params["offset"], params["limit"]
        )
        open_api_do.assert_called_once_with(
            "GET", "jobs", self.dataset_client.dataset_id, params=params
        )
Ejemplo n.º 3
0
class TestVersionControlMixin:
    gas_client = GAS("Accesskey-********************************")
    dataset_client = DatasetClient(
        "test_dataset",
        "12345",
        gas_client,
        status=Status(DEFAULT_BRANCH, commit_id=ROOT_COMMIT_ID),
        alias="",
        is_public=False,
    )
Ejemplo n.º 4
0
class TestSquashAndMergeJob:
    gas_client = GAS("Accesskey-********************************")
    dataset_client = DatasetClient(
        "test_dataset",
        "12345",
        gas_client,
        status=Status(DEFAULT_BRANCH, commit_id=ROOT_COMMIT_ID),
        alias="",
        is_public=True,
    )
    squash_and_merge_job1 = SquashAndMergeJob(
        client=dataset_client.squash_and_merge._client,
        dataset_id=dataset_client.dataset_id,
        job_updater=dataset_client.squash_and_merge._get_job,
        draft_getter=dataset_client.get_draft,
        title="test->main(abort)",
        job_id="234",
        arguments={"title": "draft-1"},
        created_at=1,
        started_at=None,
        finished_at=None,
        status="QUEUEING",
        error_message="",
        result=None,
        description="12",
    )
    squash_and_merge_job2 = SquashAndMergeJob(
        client=dataset_client.squash_and_merge._client,
        dataset_id=dataset_client.dataset_id,
        job_updater=dataset_client.squash_and_merge._get_job,
        draft_getter=dataset_client.get_draft,
        title="test->main(abort)",
        job_id="123",
        arguments={"title": "draft-1"},
        created_at=1,
        started_at=2,
        finished_at=3,
        status="SUCCESS",
        error_message="",
        result={"draftNumber": 1},
        description="12",
    )

    def test_result(self, mocker, mock_list_drafts):
        list_drafts, drafts_list = mock_list_drafts(mocker, "main")
        assert self.squash_and_merge_job1.result is None
        draft_numbers = (item.number for item in drafts_list)
        assert self.squash_and_merge_job2.result.number in draft_numbers
Ejemplo n.º 5
0
class TestJob:
    gas_client = GAS("Accesskey-********************************")
    dataset_client = DatasetClient(
        "test_dataset",
        "12345",
        gas_client,
        status=Status(DEFAULT_BRANCH, commit_id=ROOT_COMMIT_ID),
        alias="",
        is_public=True,
    )
    job = Job(
        client=dataset_client.squash_and_merge._client,
        dataset_id=dataset_client.dataset_id,
        job_updater=dataset_client.squash_and_merge._get_job,
        title="test->main(abort)",
        job_id="123",
        arguments={"title": "draft-1"},
        created_at=1,
        started_at=2,
        finished_at=3,
        status="SUCCESS",
        error_message="",
        result={"draftNumber": 3},
        description="12",
    )

    def test_update(self, mocker, mock_get_job):
        open_api_do, [_, final_responce] = mock_get_job(mocker,
                                                        until_complete=True)
        self.job.update(until_complete=True)
        assert self.job.started_at == final_responce["startedAt"]
        assert self.job.finished_at == final_responce["finishedAt"]
        assert self.job.status == final_responce["status"]
        assert self.job.error_message == final_responce["errorMessage"]
        assert self.job._result == final_responce.get("result")
        open_api_do.assert_called_with("GET", f"jobs/{self.job.job_id}",
                                       self.dataset_client.dataset_id)
Ejemplo n.º 6
0
    def test_upload_dataset(self, mocker):
        dataset = Dataset("test")
        dataset.load_catalog(
            os.path.join(
                os.path.dirname(__file__),
                "..",
                "..",
                "opendataset",
                "HeadPoseImage",
                "catalog.json",
            ))
        for i in range(5):
            dataset.create_segment(str(i))

        # upload the dataset in main branch containing no draft
        get_dataset = mocker.patch(
            f"{gas.__name__}.GAS.get_dataset",
            return_value=DatasetClient(
                "test",
                "12345",
                self.gas_client,
                status=Status(DEFAULT_BRANCH, commit_id=ROOT_COMMIT_ID),
                alias="",
                is_public=False,
            ),
        )
        checkout = mocker.patch(f"{gas.__name__}.DatasetClient.checkout")
        list_drafts = mocker.patch(f"{gas.__name__}.DatasetClient.list_drafts",
                                   return_value=[])
        create_draft = mocker.patch(
            f"{gas.__name__}.DatasetClient.create_draft")
        upload_catalog = mocker.patch(
            f"{gas.__name__}.DatasetClient.upload_catalog")
        update_notes = mocker.patch(
            f"{gas.__name__}.DatasetClient.update_notes")
        _upload_segment = mocker.patch(
            f"{gas.__name__}.DatasetClient._upload_segment")

        self.gas_client.upload_dataset(dataset)
        assert not checkout.called
        get_dataset.assert_called_once_with(dataset.name, False)
        list_drafts.assert_called_once_with(branch_name=DEFAULT_BRANCH)
        create_draft.assert_called_once_with(
            'Draft autogenerated by "GAS.upload_dataset"',
            branch_name=DEFAULT_BRANCH)
        upload_catalog.assert_called_once_with(dataset.catalog)
        update_notes.assert_called_once_with(**dataset.notes)
        assert _upload_segment.call_count == 5

        # upload the dataset in main branch containing a draft
        list_drafts = mocker.patch(
            f"{gas.__name__}.DatasetClient.list_drafts",
            return_value=[
                Draft(
                    1,
                    "title",
                    DEFAULT_BRANCH,
                    "OPEN",
                    "4c564ea07f4e47679ec8c63d238bb3a1",
                    User("test", 1636967807),
                    1637223060,
                )
            ],
        )
        checkout = mocker.patch(f"{gas.__name__}.DatasetClient.checkout")
        self.gas_client.upload_dataset(dataset)
        list_drafts.assert_called_once_with(branch_name=DEFAULT_BRANCH)
        checkout.assert_called_once_with(draft_number=1)

        # upload the dataset in dev branch containing no draft
        list_drafts = mocker.patch(
            f"{gas.__name__}.DatasetClient.list_drafts",
            return_value=[],
        )
        checkout = mocker.patch(f"{gas.__name__}.DatasetClient.checkout")
        create_draft = mocker.patch(
            f"{gas.__name__}.DatasetClient.create_draft")
        self.gas_client.upload_dataset(dataset, branch_name="dev")
        assert not checkout.called
        list_drafts.assert_called_once_with(branch_name="dev")
        create_draft.assert_called_once_with(
            'Draft autogenerated by "GAS.upload_dataset"', branch_name="dev")

        # upload the dataset in dev branch containing a draft
        list_drafts = mocker.patch(
            f"{gas.__name__}.DatasetClient.list_drafts",
            return_value=[
                Draft(
                    1,
                    "title",
                    "dev",
                    "OPEN",
                    "4c564ea07f4e47679ec8c63d238bb3a1",
                    User("test", 1636967807),
                    1637223060,
                )
            ],
        )
        checkout = mocker.patch(f"{gas.__name__}.DatasetClient.checkout")
        self.gas_client.upload_dataset(dataset, branch_name="dev")
        list_drafts.assert_called_once_with(branch_name="dev")
        checkout.assert_called_once_with(draft_number=1)
Ejemplo n.º 7
0
class TestDatasetClientBase:
    gas_client = GAS("Accesskey-********************************")
    dataset_client = DatasetClient(
        "test_dataset",
        "12345",
        gas_client,
        status=Status(DEFAULT_BRANCH, commit_id=ROOT_COMMIT_ID),
        alias="",
        is_public=False,
    )
    source_dataset_client = DatasetClient(
        "source_dataset",
        "544321",
        gas_client,
        status=Status(DEFAULT_BRANCH, commit_id=ROOT_COMMIT_ID),
        alias="",
        is_public=False,
    )

    def test__create_segment(self, mocker):
        post_data = {"name": "train"}
        post_data.update(self.dataset_client._status.get_status_info())
        open_api_do = mocker.patch(
            f"{gas.__name__}.Client.open_api_do",
            return_value=mock_response(),
        )
        self.dataset_client._create_segment("train")
        open_api_do.assert_called_once_with("POST",
                                            "segments",
                                            self.dataset_client.dataset_id,
                                            json=post_data)

    def test__list_segment(self, mocker):
        params = self.dataset_client._status.get_status_info()
        offset, limit = 0, 128
        params["offset"] = offset
        params["limit"] = limit
        open_api_do = mocker.patch(
            f"{gas.__name__}.Client.open_api_do",
            return_value=mock_response(),
        )
        self.dataset_client._list_segments()
        open_api_do.assert_called_once_with("GET",
                                            "segments",
                                            self.dataset_client._dataset_id,
                                            params=params)

    def test__generate_segment_names(self, mocker):
        params = self.dataset_client._status.get_status_info()
        offset, limit = 0, 128
        params["offset"] = offset
        params["limit"] = limit
        response_data = {
            "offset":
            0,
            "recordSize":
            2,
            "totalCount":
            2,
            "segments": [
                {
                    "name": "test",
                    "description": ""
                },
                {
                    "name": "train",
                    "description": ""
                },
            ],
        }
        open_api_do = mocker.patch(
            f"{gas.__name__}.Client.open_api_do",
            return_value=mock_response(data=response_data),
        )
        assert list(self.dataset_client._generate_segment_names()) == [
            segment["name"] for segment in response_data["segments"]
        ]
        open_api_do.assert_called_once_with("GET",
                                            "segments",
                                            self.dataset_client._dataset_id,
                                            params=params)

    def test__copy_segment(self, mocker):
        self.dataset_client._status.checkout(draft_number=1)
        source_name, target_name = "default", "train"
        with pytest.raises(InvalidParamsError):
            self.dataset_client._copy_segment(source_name,
                                              target_name,
                                              source_client=None,
                                              strategy="move")

        with pytest.raises(ValueError):
            self.dataset_client._copy_segment(source_name,
                                              source_name,
                                              source_client=None)

        open_api_do = mocker.patch(
            f"{gas.__name__}.Client.open_api_do",
            return_value=mock_response(),
        )

        source = {"segmentName": source_name}
        source["id"] = self.source_dataset_client.dataset_id
        source.update(self.source_dataset_client.status.get_status_info())
        post_data = {
            "strategy": "abort",
            "source": source,
            "segmentName": target_name,
        }
        post_data.update(self.dataset_client._status.get_status_info())
        self.dataset_client._copy_segment(
            source_name, target_name, source_client=self.source_dataset_client)
        open_api_do.assert_called_once_with("POST",
                                            "segments?copy",
                                            self.dataset_client._dataset_id,
                                            json=post_data)

    def test__move_segment(self, mocker):
        self.dataset_client._status.checkout(draft_number=1)
        source_name, target_name = "default", "train"
        with pytest.raises(InvalidParamsError):
            self.dataset_client._move_segment(source_name,
                                              target_name,
                                              strategy="move")

        open_api_do = mocker.patch(
            f"{gas.__name__}.Client.open_api_do",
            return_value=mock_response(),
        )
        post_data = {
            "strategy": "abort",
            "source": {
                "segmentName": source_name
            },
            "segmentName": target_name,
        }
        post_data.update(self.dataset_client._status.get_status_info())
        self.dataset_client._move_segment(source_name, target_name)
        open_api_do.assert_called_once_with("POST",
                                            "segments?move",
                                            self.dataset_client._dataset_id,
                                            json=post_data)

    def test_enable_cache(
        self,
        mocker,
        mock_list_segments,
        mock_list_data_details,
        mock_get_total_size,
        tmp_path,
        caplog,
    ):
        self.dataset_client._status.checkout(draft_number=1)
        with pytest.raises(StatusError):
            self.dataset_client.enable_cache()

        self.dataset_client._status.checkout(commit_id="commit-1")

        _, response_data = mock_get_total_size(mocker, True)
        _, _, free_storage = shutil.disk_usage(tempfile.gettempdir())
        self.dataset_client.enable_cache()
        log_message = (
            "bytes left on device, "
            f'less than the dataset size {response_data["totalSize"]} bytes.\n '
            "Please be aware that there is not enough space to cache the entire dataset."
        )
        assert log_message in caplog.text

        mock_get_total_size(mocker)
        cache_path = tmp_path / "cache_data"
        dataset_cache_path = cache_path / self.dataset_client.dataset_id
        self.dataset_client.enable_cache(cache_path)
        assert self.dataset_client.cache_enabled == True
        assert self.dataset_client._cache_path == str(dataset_cache_path)

        _, list_segments = mock_list_segments(mocker)
        segment_name = list_segments["segments"][0]["name"]
        segment = self.dataset_client.get_segment(segment_name)
        mock_list_data_details(mocker)
        segment_data = segment.list_data()
        urlopen = mocker.patch(
            f"{file.__name__}.RemoteFileMixin._urlopen",
            return_value=mock_response(read=lambda *args: bytes(1)),
        )

        segment_cache_path = (dataset_cache_path /
                              self.dataset_client.status.commit_id /
                              segment_name)

        # Traverse the segment for the first time.
        for data in segment_data:
            assert data.cache_path == str(segment_cache_path / data.path)
            data.open()

        local_open = mocker.patch(
            f"builtins.open",
            return_value=bytes(1),
        )
        segment_length = len(segment_data)
        # Traverse the segment for 3 times using cached data.
        epoch = 3
        for i in range(epoch):
            for data in segment_data:
                assert data.cache_path == str(segment_cache_path / data.path)
                data.open()
                # local_open.assert_called_once()
        assert local_open.call_count == segment_length * epoch
        assert urlopen.call_count == segment_length

        self.dataset_client._status.checkout(draft_number=1)
        assert self.dataset_client.cache_enabled == False
        assert self.dataset_client._cache_path == str(dataset_cache_path)

    def test_update_notes(self, mocker):
        self.dataset_client._status.checkout(draft_number=1)

        patch_data = {"binPointCloudFields": None}
        patch_data.update(self.dataset_client._status.get_status_info())

        open_api_do = mocker.patch(
            f"{gas.__name__}.Client.open_api_do",
            return_value=mock_response(),
        )
        self.dataset_client.update_notes(bin_point_cloud_fields=None)
        open_api_do.assert_called_once_with("PATCH",
                                            "notes",
                                            self.dataset_client._dataset_id,
                                            json=patch_data)

        patch_data = {
            "isContinuous": True,
            "binPointCloudFields": ["X", "Y", "Z", "Intensity", "Ring"],
        }
        patch_data.update(self.dataset_client._status.get_status_info())
        open_api_do = mocker.patch(
            f"{gas.__name__}.Client.open_api_do",
            return_value=mock_response(),
        )
        self.dataset_client.update_notes(
            is_continuous=True,
            bin_point_cloud_fields=["X", "Y", "Z", "Intensity", "Ring"])
        open_api_do.assert_called_once_with("PATCH",
                                            "notes",
                                            self.dataset_client._dataset_id,
                                            json=patch_data)

    def test_get_notes(self, mocker):
        params = self.dataset_client._status.get_status_info()
        response_data = {
            "isContinuous": True,
            "binPointCloudFields": ["X", "Y", "Z", "Intensity", "Ring"],
        }
        open_api_do = mocker.patch(
            f"{gas.__name__}.Client.open_api_do",
            return_value=mock_response(data=response_data),
        )
        assert self.dataset_client.get_notes() == Notes.loads(response_data)
        open_api_do.assert_called_once_with("GET",
                                            "notes",
                                            self.dataset_client._dataset_id,
                                            params=params)

    def test_list_segment(self, mocker):
        response_data = {
            "offset":
            0,
            "recordSize":
            2,
            "totalCount":
            2,
            "segments": [
                {
                    "name": "test",
                    "description": ""
                },
                {
                    "name": "train",
                    "description": ""
                },
            ],
        }
        mocker.patch(
            f"{gas.__name__}.Client.open_api_do",
            return_value=mock_response(data=response_data),
        )
        assert list(self.dataset_client.list_segment_names()) == [
            segment["name"] for segment in response_data["segments"]
        ]

    def test_get_catelog(self, mocker):
        params = self.dataset_client._status.get_status_info()
        response_data = {
            "catalog": {
                "CLASSIFICATION": {
                    "categories": [{
                        "name": "cat"
                    }]
                }
            }
        }
        open_api_do = mocker.patch(
            f"{gas.__name__}.Client.open_api_do",
            return_value=mock_response(data=response_data),
        )
        assert self.dataset_client.get_catalog() == Catalog.loads(
            response_data["catalog"])
        open_api_do.assert_called_once_with("GET",
                                            "labels/catalogs",
                                            self.dataset_client._dataset_id,
                                            params=params)

    def test_upload_catalog(self, mocker):
        self.dataset_client._status.checkout(draft_number=1)
        open_api_do = mocker.patch(
            f"{gas.__name__}.Client.open_api_do",
            return_value=mock_response(),
        )
        catalog = Catalog.loads(
            {"CLASSIFICATION": {
                "categories": [{
                    "name": "cat"
                }]
            }})
        self.dataset_client.upload_catalog(catalog)
        put_data = {"catalog": catalog.dumps()}
        put_data.update(self.dataset_client._status.get_status_info())
        open_api_do.assert_called_once_with("PUT",
                                            "labels/catalogs",
                                            self.dataset_client._dataset_id,
                                            json=put_data)

    def test_delete_segment(self, mocker):
        self.dataset_client._status.checkout(draft_number=1)
        delete_data = {"segmentName": "train"}
        delete_data.update(self.dataset_client._status.get_status_info())
        open_api_do = mocker.patch(
            f"{gas.__name__}.Client.open_api_do",
            return_value=mock_response(),
        )
        self.dataset_client.delete_segment("train")
        open_api_do.assert_called_once_with("DELETE",
                                            "segments",
                                            self.dataset_client._dataset_id,
                                            json=delete_data)

    def test_get_label_statistics(self, mocker):
        params = self.dataset_client._status.get_status_info()
        response_data = {
            "labelStatistics": {
                "BOX2D": {
                    "quantity":
                    10,
                    "categories": [{
                        "name":
                        "vehicles.bike",
                        "quantity":
                        10,
                        "attributes": [{
                            "name": "trafficLightColor",
                            "enum": ["none", "red", "yellow"],
                            "quantities": [5, 3, 2],
                        }],
                    }],
                }
            }
        }
        open_api_do = mocker.patch(
            f"{gas.__name__}.Client.open_api_do",
            return_value=mock_response(data=response_data),
        )
        statistics1 = self.dataset_client.get_label_statistics()
        open_api_do.assert_called_once_with("GET",
                                            "labels/statistics",
                                            self.dataset_client.dataset_id,
                                            params=params)
        assert statistics1 == Statistics(response_data["labelStatistics"])

    def test_get_total_size(self, mocker, mock_get_total_size):
        self.dataset_client._status.checkout(commit_id="commit-1")
        params = {"commit": self.dataset_client._status.commit_id}
        get_total_size, response_data = mock_get_total_size(mocker)
        total_size = self.dataset_client.get_total_size()
        get_total_size.assert_called_once_with("GET",
                                               "total-size",
                                               self.dataset_client.dataset_id,
                                               params=params)
        assert total_size == response_data["totalSize"]