def test_creation_from_scratch(self, client, created_entities): dataset = Dataset(client._conn, client._conf, dataset_type=_DatasetService.DatasetTypeEnum.PATH) created_entities.append(dataset) version = DatasetVersion(client._conn, client._conf, dataset_id=dataset.id, dataset_version_info=_DatasetVersionService.PathDatasetVersionInfo(), dataset_type=_DatasetService.DatasetTypeEnum.PATH) assert version.id
def test_creation_from_scratch(self, client): name = utils.gen_str() dataset = client.create_dataset(name=name, dataset_type=_DatasetService.DatasetTypeEnum.PATH) version = client.create_dataset_version(dataset=dataset, dataset_version_info=_DatasetVersionService.PathDatasetVersionInfo(), dataset_type=_DatasetService.DatasetTypeEnum.PATH) assert version.dataset_type == _DatasetService.DatasetTypeEnum.PATH assert version.id
def test_creation_from_scratch(self, client): name = utils.gen_str() dataset = Dataset(client._conn, client._conf, name=name, dataset_type=_DatasetService.DatasetTypeEnum.PATH) version = DatasetVersion(client._conn, client._conf, dataset_id=dataset.id, dataset_version_info=_DatasetVersionService.PathDatasetVersionInfo(), dataset_type=_DatasetService.DatasetTypeEnum.PATH) assert version.dataset_type == _DatasetService.DatasetTypeEnum.PATH assert version.id
def test_creation_from_scratch(self, client, created_datasets): dataset = Dataset(client._conn, client._conf, dataset_type=_DatasetService.DatasetTypeEnum.DatasetType.QUERY) created_datasets.append(dataset) version = DatasetVersion(client._conn, client._conf, dataset_id=dataset.id, dataset_version_info=_DatasetVersionService.QueryDatasetVersionInfo(), dataset_type=_DatasetService.DatasetTypeEnum.QUERY) assert version.id
def delete(self): """ Deletes this dataset version. """ msg = _DatasetVersionService.DeleteDatasetVersion(id=self.id) response = self._conn.make_proto_request( "DELETE", "/api/v1/modeldb/dataset-version/deleteDatasetVersion", body=msg, ) self._conn.must_response(response)
def test_get_versions(self, client): name = utils.gen_str() dataset = client.create_dataset(name=name, dataset_type=_DatasetService.DatasetTypeEnum.PATH) version1 = client.create_dataset_version(dataset=dataset, dataset_version_info=_DatasetVersionService.PathDatasetVersionInfo(), dataset_type=_DatasetService.DatasetTypeEnum.PATH) assert version1.dataset_type == _DatasetService.DatasetTypeEnum.PATH assert version1.id version2 = client.create_dataset_version(dataset=dataset, dataset_version_info=_DatasetVersionService.PathDatasetVersionInfo(), dataset_type=_DatasetService.DatasetTypeEnum.PATH) assert version2.dataset_type == _DatasetService.DatasetTypeEnum.PATH assert version2.id versions = client.get_all_versions_for_dataset(dataset) assert len(versions) == 2 version = client.get_latest_version_for_dataset(dataset, ascending=True) assert version.id == version1.id
def test_creation_by_id(self, client, created_datasets): dataset = Dataset(client._conn, client._conf, dataset_type=_DatasetService.DatasetTypeEnum.PATH) created_datasets.append(dataset) version = DatasetVersion(client._conn, client._conf, dataset_id=dataset.id, dataset_version_info=_DatasetVersionService.PathDatasetVersionInfo(), dataset_type=_DatasetService.DatasetTypeEnum.PATH) assert version.id same_version = DatasetVersion(client._conn, client._conf, _dataset_version_id=version.id) assert version.id == same_version.id
def test_creation_by_id(self, client): name = utils.gen_str() dataset = Dataset(client._conn, client._conf, name=name, dataset_type=_DatasetService.DatasetTypeEnum.QUERY) version = DatasetVersion(client._conn, client._conf, dataset_id=dataset.id, dataset_version_info=_DatasetVersionService.QueryDatasetVersionInfo(), dataset_type=_DatasetService.DatasetTypeEnum.QUERY) assert version.dataset_type == _DatasetService.DatasetTypeEnum.QUERY assert version.id same_version = DatasetVersion(client._conn, client._conf, _dataset_version_id=version.id) assert version.id == same_version.id
def _upload_artifact(self, dataset_component_path, file_handle, part_size=_artifact_utils._64MB): """ Uploads `file_handle` to ModelDB artifact store. Parameters ---------- dataset_component_path : str Filepath in dataset component blob. file_handle : file-like Artifact to be uploaded. part_size : int, default 64 MB If using multipart upload, number of bytes to upload per part. """ file_handle.seek(0) # check if multipart upload ok url_for_artifact = self._get_url_for_artifact(dataset_component_path, "PUT", part_num=1) print("uploading {} to ModelDB".format(dataset_component_path)) if url_for_artifact.multipart_upload_ok: # TODO: parallelize this file_parts = iter(lambda: file_handle.read(part_size), b'') for part_num, file_part in enumerate(file_parts, start=1): print("uploading part {}".format(part_num), end='\r') # get presigned URL url = self._get_url_for_artifact(dataset_component_path, "PUT", part_num=part_num).url # wrap file part into bytestream to avoid OverflowError # Passing a bytestring >2 GB (num bytes > max val of int32) directly to # ``requests`` will overwhelm CPython's SSL lib when it tries to sign the # payload. But passing a buffered bytestream instead of the raw bytestring # indicates to ``requests`` that it should perform a streaming upload via # HTTP/1.1 chunked transfer encoding and avoid this issue. # https://github.com/psf/requests/issues/2717 part_stream = six.BytesIO(file_part) # upload part response = _utils.make_request("PUT", url, self._conn, data=part_stream) _utils.raise_for_http_error(response) # commit part url = "{}://{}/api/v1/modeldb/dataset-version/commitVersionedDatasetBlobArtifactPart".format( self._conn.scheme, self._conn.socket, ) msg = _DatasetVersionService.CommitVersionedDatasetBlobArtifactPart( dataset_version_id=self.id, path_dataset_component_blob_path=dataset_component_path, ) msg.artifact_part.part_number = part_num msg.artifact_part.etag = response.headers['ETag'] data = _utils.proto_to_json(msg) response = _utils.make_request("POST", url, self._conn, json=data) _utils.raise_for_http_error(response) print() # complete upload url = "{}://{}/api/v1/modeldb/dataset-version/commitMultipartVersionedDatasetBlobArtifact".format( self._conn.scheme, self._conn.socket, ) msg = _DatasetVersionService.CommitMultipartVersionedDatasetBlobArtifact( dataset_version_id=self.id, path_dataset_component_blob_path=dataset_component_path, ) data = _utils.proto_to_json(msg) response = _utils.make_request("POST", url, self._conn, json=data) _utils.raise_for_http_error(response) else: # upload full artifact if url_for_artifact.fields: # if fields were returned by backend, make a POST request and supply them as form fields response = _utils.make_request( "POST", url_for_artifact.url, self._conn, # requests uses the `files` parameter for sending multipart/form-data POSTs. # https://stackoverflow.com/a/12385661/8651995 # the file contents must be the final form field # https://docs.aws.amazon.com/AmazonS3/latest/dev/HTTPPOSTForms.html#HTTPPOSTFormFields files=list(url_for_artifact.fields.items()) + [('file', file_handle)], ) else: response = _utils.make_request("PUT", url_for_artifact.url, self._conn, data=file_handle) _utils.raise_for_http_error(response) print("upload complete")
def __init__(self, conn, conf): super(DatasetVersions, self).__init__( conn, conf, _DatasetVersionService.FindDatasetVersions(), )