Esempio n. 1
0
 def test_creation_from_scratch(self, client, created_entities):
     dataset = Dataset(client._conn, client._conf,
                       dataset_type=_DatasetService.DatasetTypeEnum.PATH)
     created_entities.append(dataset)
     version = DatasetVersion(client._conn, client._conf,
                              dataset_id=dataset.id,
                              dataset_version_info=_DatasetVersionService.PathDatasetVersionInfo(),
                              dataset_type=_DatasetService.DatasetTypeEnum.PATH)
     assert version.id
Esempio n. 2
0
    def test_creation_from_scratch(self, client):
        name = utils.gen_str()
        dataset = client.create_dataset(name=name,
                                        dataset_type=_DatasetService.DatasetTypeEnum.PATH)

        version = client.create_dataset_version(dataset=dataset,
                                                dataset_version_info=_DatasetVersionService.PathDatasetVersionInfo(),
                                                dataset_type=_DatasetService.DatasetTypeEnum.PATH)
        assert version.dataset_type == _DatasetService.DatasetTypeEnum.PATH
        assert version.id
Esempio n. 3
0
 def test_creation_from_scratch(self, client):
     name = utils.gen_str()
     dataset = Dataset(client._conn, client._conf,
                       name=name, dataset_type=_DatasetService.DatasetTypeEnum.PATH)
     version = DatasetVersion(client._conn, client._conf,
                              dataset_id=dataset.id,
                              dataset_version_info=_DatasetVersionService.PathDatasetVersionInfo(),
                              dataset_type=_DatasetService.DatasetTypeEnum.PATH)
     assert version.dataset_type == _DatasetService.DatasetTypeEnum.PATH
     assert version.id
Esempio n. 4
0
    def test_creation_from_scratch(self, client, created_datasets):
        dataset = Dataset(client._conn, client._conf,
                          dataset_type=_DatasetService.DatasetTypeEnum.DatasetType.QUERY)
        created_datasets.append(dataset)

        version = DatasetVersion(client._conn, client._conf,
                                 dataset_id=dataset.id,
                                 dataset_version_info=_DatasetVersionService.QueryDatasetVersionInfo(),
                                 dataset_type=_DatasetService.DatasetTypeEnum.QUERY)
        assert version.id
Esempio n. 5
0
    def delete(self):
        """
        Deletes this dataset version.

        """
        msg = _DatasetVersionService.DeleteDatasetVersion(id=self.id)
        response = self._conn.make_proto_request(
            "DELETE",
            "/api/v1/modeldb/dataset-version/deleteDatasetVersion",
            body=msg,
        )
        self._conn.must_response(response)
Esempio n. 6
0
    def test_get_versions(self, client):
        name = utils.gen_str()
        dataset = client.create_dataset(name=name,
                                        dataset_type=_DatasetService.DatasetTypeEnum.PATH)

        version1 = client.create_dataset_version(dataset=dataset,
                                                 dataset_version_info=_DatasetVersionService.PathDatasetVersionInfo(),
                                                 dataset_type=_DatasetService.DatasetTypeEnum.PATH)
        assert version1.dataset_type == _DatasetService.DatasetTypeEnum.PATH
        assert version1.id

        version2 = client.create_dataset_version(dataset=dataset,
                                                 dataset_version_info=_DatasetVersionService.PathDatasetVersionInfo(),
                                                 dataset_type=_DatasetService.DatasetTypeEnum.PATH)
        assert version2.dataset_type == _DatasetService.DatasetTypeEnum.PATH
        assert version2.id

        versions = client.get_all_versions_for_dataset(dataset)
        assert len(versions) == 2

        version = client.get_latest_version_for_dataset(dataset, ascending=True)
        assert version.id == version1.id
Esempio n. 7
0
    def test_creation_by_id(self, client, created_datasets):
        dataset = Dataset(client._conn, client._conf,
                          dataset_type=_DatasetService.DatasetTypeEnum.PATH)
        created_datasets.append(dataset)
        version = DatasetVersion(client._conn, client._conf,
                                 dataset_id=dataset.id,
                                 dataset_version_info=_DatasetVersionService.PathDatasetVersionInfo(),
                                 dataset_type=_DatasetService.DatasetTypeEnum.PATH)
        assert version.id

        same_version = DatasetVersion(client._conn, client._conf,
                                      _dataset_version_id=version.id)
        assert version.id == same_version.id
Esempio n. 8
0
    def test_creation_by_id(self, client):
        name = utils.gen_str()
        dataset = Dataset(client._conn, client._conf,
                          name=name, dataset_type=_DatasetService.DatasetTypeEnum.QUERY)

        version = DatasetVersion(client._conn, client._conf,
                                 dataset_id=dataset.id,
                                 dataset_version_info=_DatasetVersionService.QueryDatasetVersionInfo(),
                                 dataset_type=_DatasetService.DatasetTypeEnum.QUERY)
        assert version.dataset_type == _DatasetService.DatasetTypeEnum.QUERY
        assert version.id

        same_version = DatasetVersion(client._conn, client._conf,
                                      _dataset_version_id=version.id)
        assert version.id == same_version.id
Esempio n. 9
0
    def _upload_artifact(self,
                         dataset_component_path,
                         file_handle,
                         part_size=_artifact_utils._64MB):
        """
        Uploads `file_handle` to ModelDB artifact store.

        Parameters
        ----------
        dataset_component_path : str
            Filepath in dataset component blob.
        file_handle : file-like
            Artifact to be uploaded.
        part_size : int, default 64 MB
            If using multipart upload, number of bytes to upload per part.

        """
        file_handle.seek(0)

        # check if multipart upload ok
        url_for_artifact = self._get_url_for_artifact(dataset_component_path,
                                                      "PUT",
                                                      part_num=1)

        print("uploading {} to ModelDB".format(dataset_component_path))
        if url_for_artifact.multipart_upload_ok:
            # TODO: parallelize this
            file_parts = iter(lambda: file_handle.read(part_size), b'')
            for part_num, file_part in enumerate(file_parts, start=1):
                print("uploading part {}".format(part_num), end='\r')

                # get presigned URL
                url = self._get_url_for_artifact(dataset_component_path,
                                                 "PUT",
                                                 part_num=part_num).url

                # wrap file part into bytestream to avoid OverflowError
                #     Passing a bytestring >2 GB (num bytes > max val of int32) directly to
                #     ``requests`` will overwhelm CPython's SSL lib when it tries to sign the
                #     payload. But passing a buffered bytestream instead of the raw bytestring
                #     indicates to ``requests`` that it should perform a streaming upload via
                #     HTTP/1.1 chunked transfer encoding and avoid this issue.
                #     https://github.com/psf/requests/issues/2717
                part_stream = six.BytesIO(file_part)

                # upload part
                response = _utils.make_request("PUT",
                                               url,
                                               self._conn,
                                               data=part_stream)
                _utils.raise_for_http_error(response)

                # commit part
                url = "{}://{}/api/v1/modeldb/dataset-version/commitVersionedDatasetBlobArtifactPart".format(
                    self._conn.scheme,
                    self._conn.socket,
                )
                msg = _DatasetVersionService.CommitVersionedDatasetBlobArtifactPart(
                    dataset_version_id=self.id,
                    path_dataset_component_blob_path=dataset_component_path,
                )
                msg.artifact_part.part_number = part_num
                msg.artifact_part.etag = response.headers['ETag']
                data = _utils.proto_to_json(msg)
                response = _utils.make_request("POST",
                                               url,
                                               self._conn,
                                               json=data)
                _utils.raise_for_http_error(response)
            print()

            # complete upload
            url = "{}://{}/api/v1/modeldb/dataset-version/commitMultipartVersionedDatasetBlobArtifact".format(
                self._conn.scheme,
                self._conn.socket,
            )
            msg = _DatasetVersionService.CommitMultipartVersionedDatasetBlobArtifact(
                dataset_version_id=self.id,
                path_dataset_component_blob_path=dataset_component_path,
            )
            data = _utils.proto_to_json(msg)
            response = _utils.make_request("POST", url, self._conn, json=data)
            _utils.raise_for_http_error(response)
        else:
            # upload full artifact
            if url_for_artifact.fields:
                # if fields were returned by backend, make a POST request and supply them as form fields
                response = _utils.make_request(
                    "POST",
                    url_for_artifact.url,
                    self._conn,
                    # requests uses the `files` parameter for sending multipart/form-data POSTs.
                    #     https://stackoverflow.com/a/12385661/8651995
                    # the file contents must be the final form field
                    #     https://docs.aws.amazon.com/AmazonS3/latest/dev/HTTPPOSTForms.html#HTTPPOSTFormFields
                    files=list(url_for_artifact.fields.items()) +
                    [('file', file_handle)],
                )
            else:
                response = _utils.make_request("PUT",
                                               url_for_artifact.url,
                                               self._conn,
                                               data=file_handle)
            _utils.raise_for_http_error(response)

        print("upload complete")
Esempio n. 10
0
 def __init__(self, conn, conf):
     super(DatasetVersions, self).__init__(
         conn,
         conf,
         _DatasetVersionService.FindDatasetVersions(),
     )