def test_artifact_hash(self): # do not change this test case without coordinating with Artifact API's ArtifactHashComputer artifacts = [ ArtifactFileData( file_path="to/file1", file_hash="c38444d2ccff1a7aab3d323fb6234e1b4f0a81ac", type="S3", size=5234, metadata={ "location": "s3://bucket/path/to/file1", "last_modification": "2021-08-09 10:22:53", }, ), ArtifactFileData( file_path="from/file2", file_hash="4347d0f8ba661234a8eadc005e2e1d1b646c9682", type="S3", metadata={ "location": "s3://bucket/path/to/file2", "last_modification": "2021-08-09 10:32:12", }, ), ] expected_hash = ( "56e64245b1d4915ff27b306c8077cd4f9ce1b31233c690a93ebc38a1b737a9ea") self.assertEqual(expected_hash, FileHasher.get_artifact_hash(artifacts)) self.assertEqual(expected_hash, FileHasher.get_artifact_hash(reversed(artifacts)))
def setUp(self): self.monkeypatch = MonkeyPatch() self.wait = self._random_wait() self.op_processor = MagicMock() self.exp = self._create_run(processor=self.op_processor) self.path = self._random_path() self.path_str = path_to_str(self.path) self.artifact_hash = ( "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855") self.artifact_files = [ ArtifactFileData( file_path="fname.txt", file_hash= "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", type="test", size=213, metadata={}, ), ArtifactFileData( file_path="subdir/other.mp3", file_hash= "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", type="test", metadata={}, ), ] self.exp.set_attribute(self.path_str, Artifact(self.exp, self.path)) self.exp._backend._containers[(self.exp._id, ContainerType.RUN)].set( self.path, ArtifactAttr(self.artifact_hash)) self.exp._backend._artifacts[self.exp._project_id, self.artifact_hash] = self.artifact_files self._downloads = set() class TestArtifactDriver(ArtifactDriver): @classmethod def get_type(cls): return "test" @classmethod def matches(cls, path: str) -> bool: return False @classmethod def get_tracked_files(cls, path, destination=None): return [] @classmethod def download_file(cls, destination: pathlib.Path, file_definition: ArtifactFileData): destination.touch() self.test_artifact_driver = TestArtifactDriver
def create_artifact_version( swagger_client: SwaggerClient, project_id: str, artifact_hash: str, parent_identifier: str, files: List[ArtifactFileData], default_request_params: Dict, ) -> ArtifactModel: params = { "projectIdentifier": project_id, "hash": artifact_hash, "parentIdentifier": parent_identifier, "artifactFilesDTO": { "files": [ArtifactFileData.to_dto(a) for a in files] }, **default_request_params, } try: result = swagger_client.api.createArtifactVersion( **params).response().result return ArtifactModel( hash=result.artifactHash, size=result.size, received_metadata=result.receivedMetadata, ) except HTTPNotFound: raise ArtifactNotFoundException(artifact_hash)
def get_tracked_files( cls, path: str, destination: str = None) -> typing.List[ArtifactFileData]: url = urlparse(path) bucket_name, prefix = url.netloc, url.path.lstrip("/") if "*" in prefix: raise NeptuneUnsupportedArtifactFunctionalityException( f"Wildcard characters (*,?) in location URI ({path}) are not supported." ) # pylint: disable=no-member remote_storage = boto3.resource("s3").Bucket(bucket_name) stored_files: typing.List[ArtifactFileData] = list() try: for remote_object in remote_storage.objects.filter(Prefix=prefix): # If prefix is path to file get only directories if prefix == remote_object.key: prefix = str(pathlib.PurePosixPath(prefix).parent) remote_key = remote_object.key destination = pathlib.PurePosixPath(destination or "") relative_file_path = remote_key[len(prefix.lstrip(".") ):].lstrip("/") file_path = destination / relative_file_path stored_files.append( ArtifactFileData( file_path=str(file_path).lstrip("/"), file_hash=remote_object.e_tag.strip('"'), type=ArtifactFileType.S3.value, size=remote_object.size, metadata=cls._serialize_metadata({ "location": f's3://{bucket_name}/{remote_key.lstrip("/")}', "last_modified": remote_object.last_modified, }), )) except NoCredentialsError: raise NeptuneRemoteStorageCredentialsException() except ( remote_storage.meta.client.exceptions.NoSuchBucket, remote_storage.meta.client.exceptions.NoSuchKey, ): raise NeptuneRemoteStorageAccessException(location=path) return stored_files
def test_non_existing_file_download(self): path = "/wrong/path" artifact_file = ArtifactFileData( file_path=path, file_hash="??", type="??", metadata={"file_path": path} ) with self.assertRaises( NeptuneLocalStorageAccessException ), tempfile.TemporaryDirectory() as temporary: local_destination = Path(temporary) LocalArtifactDriver.download_file( destination=local_destination, file_definition=artifact_file )
def setUp(self) -> None: self.artifact_hash = ( "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" ) self.files = [ ArtifactFileData( "fname.txt", "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", "test", {}, ), ] self.project_id = str(uuid.uuid4()) self.parent_identifier = str(uuid.uuid4())
def list_artifact_files( self, project_id: str, artifact_hash: str ) -> List[ArtifactFileData]: params = { "projectIdentifier": project_id, "hash": artifact_hash, **DEFAULT_REQUEST_KWARGS, } try: result = ( self.artifacts_client.api.listArtifactFiles(**params).response().result ) return [ArtifactFileData.from_dto(a) for a in result.files] except HTTPNotFound: raise ArtifactNotFoundException(artifact_hash)
def test_file_download(self): artifact_file = ArtifactFileData( file_path="to/file1", file_hash="2f249230a8e7c2bf6005ccd2679259ec", type=ArtifactFileType.S3.value, metadata={"location": f"s3://{self.bucket_name}/path/to/file1"}, ) with tempfile.TemporaryDirectory() as temporary: local_destination = Path(temporary) / "target.txt" S3ArtifactDriver.download_file(destination=local_destination, file_definition=artifact_file) self.assertEqual("2f249230a8e7c2bf6005ccd2679259ec", md5(local_destination))
def get_tracked_files( cls, path: str, destination: str = None) -> typing.List[ArtifactFileData]: file_protocol_prefix = "file://" if path.startswith(file_protocol_prefix): path = path[len(file_protocol_prefix):] if "*" in path: raise NeptuneUnsupportedArtifactFunctionalityException( f"Wildcard characters (*,?) in location URI ({path}) are not supported." ) source_location = pathlib.Path(path).expanduser() stored_files: typing.List[ArtifactFileData] = list() files_to_check = (source_location.rglob("*") if source_location.is_dir() else [source_location]) for file in files_to_check: # symlink dirs are omitted by rglob('*') if not file.is_file(): continue if source_location.is_dir(): file_path = file.relative_to(source_location).as_posix() else: file_path = file.name file_path = (file_path if destination is None else (pathlib.Path(destination) / file_path).as_posix()) stored_files.append( ArtifactFileData( file_path=file_path, file_hash=FileHasher.get_local_file_hash(file), type=ArtifactFileType.LOCAL.value, size=file.stat().st_size, metadata=cls._serialize_metadata({ "file_path": f"file://{file.resolve().as_posix()}", "last_modified": file.stat().st_mtime, }), )) return stored_files
def test_file_download(self): path = (self.test_dir / "data/file1.txt").as_posix() artifact_file = ArtifactFileData( file_path="data/file1.txt", file_hash="??", type="??", metadata={"file_path": f"file://{path}"}, ) with tempfile.TemporaryDirectory() as temporary: downloaded_file = Path(temporary) / "downloaded_file.ext" LocalArtifactDriver.download_file( destination=downloaded_file, file_definition=artifact_file ) self.assertTrue(Path(downloaded_file).is_symlink()) self.assertEqual("ad62f265e5b1a2dc51f531e44e748aa0", md5(downloaded_file))