def test_list_non_empty(self, client): base_path = "/path/" # Create some files to return dir_prefix = BlobPrefix() dir_prefix.name = base_path + "dir" blob1_props = BlobProperties() blob1_props.size = 42 blob1_props.name = base_path + "file1" blob2_props = BlobProperties() blob2_props.size = 0 blob2_props.name = base_path + "file2" client.get_container_client().walk_blobs.return_value = MockBlobList( [dir_prefix, blob1_props, blob2_props] ) store = AzureBlobStoreService() store.set_connection(connection=client) key_path = self.wasbs_base + base_path results = store.list(key=key_path) assert len(results["blobs"]) == 2 assert len(results["prefixes"]) == 1 assert results["prefixes"][0] == "dir" assert results["blobs"][0][0] == "file1" assert results["blobs"][0][1] == 42 assert results["blobs"][1][0] == "file2" assert results["blobs"][1][1] == 0
def test_download_dir_with_basename(self, client): dirname1 = tempfile.mkdtemp() dirname2 = tempfile.mkdtemp(prefix=dirname1 + "/") def mkfile(fname): return open(fname, "w") client.return_value.get_blob_to_path.side_effect = mkfile store = AzureBlobStoreService() store.set_connection(connection=client) blob_path = "/path/to/" azure_url = self.wasbs_base + blob_path rel_path2 = dirname2.split("/")[-1] # Mock return list obj_mock1 = BlobProperties() obj_mock1.size = 42 obj_mock1.name = blob_path + "foo/test1.txt" obj_mock2 = BlobProperties() obj_mock2.size = 42 obj_mock2.name = blob_path + "foo/test2.txt" subdir_mock = BlobPrefix() subdir_mock.name = blob_path + "foo/" + rel_path2 obj_mock3 = BlobProperties() obj_mock3.size = 42 obj_mock3.name = blob_path + "foo/" + rel_path2 + "/" + "test3.txt" # Create some files to return def list_side_effect(container_name, prefix, delimiter="/", marker=None): if prefix == blob_path + "foo/": return MockBlobList([subdir_mock, obj_mock1, obj_mock2]) return MockBlobList([obj_mock3]) client.return_value.walk_blobs.side_effect = list_side_effect dirname3 = tempfile.mkdtemp() # Test without basename store.download_dir( blob=azure_url + "foo", local_path=dirname3, use_basename=True ) call_args_list = client.get_container_client().download_blob.call_args_list for call_args in call_args_list: call_arg1 = call_args[0] if call_arg1 == "{}foo/test1.txt".format(blob_path): assert os.path.exists("{}/foo/test1.txt".format(dirname3)) elif call_arg1 == "{}foo/test2.txt".format(blob_path): assert os.path.exists("{}/foo/test2.txt".format(dirname3)) elif call_arg1 == "{}foo/{}/test3.txt".format(blob_path, rel_path2): assert os.path.exists("{}/foo/{}/test3.txt".format(dirname3, rel_path2)) else: assert False
def test_download_directory_artifact_succeeds_when_artifact_root_is_blob_container_root( mock_client, tmpdir): repo = AzureBlobArtifactRepository(TEST_BLOB_CONTAINER_ROOT, mock_client) subdir_path = "my_directory" dir_prefix = BlobPrefix() dir_prefix.name = subdir_path file_path_1 = "file_1" file_path_2 = "file_2" blob_props_1 = BlobProperties() blob_props_1.size = 42 blob_props_1.name = posixpath.join(subdir_path, file_path_1) blob_props_2 = BlobProperties() blob_props_2.size = 42 blob_props_2.name = posixpath.join(subdir_path, file_path_2) def get_mock_listing(*args, **kwargs): """ Produces a mock listing that only contains content if the specified prefix is the artifact root or a relevant subdirectory. This allows us to mock `list_artifacts` during the `_download_artifacts_into` subroutine without recursively listing the same artifacts at every level of the directory traversal. """ # pylint: disable=unused-argument if posixpath.abspath(kwargs["name_starts_with"]) == "/": return MockBlobList([dir_prefix]) if posixpath.abspath( kwargs["name_starts_with"]) == posixpath.abspath(subdir_path): return MockBlobList([blob_props_1, blob_props_2]) else: return MockBlobList([]) def create_file(buffer): fname = os.path.basename(buffer.name) f = tmpdir.join(fname) f.write("hello world!") mock_client.get_container_client( ).walk_blobs.side_effect = get_mock_listing mock_client.get_container_client().download_blob( ).readinto.side_effect = create_file # Ensure that the root directory can be downloaded successfully repo.download_artifacts("") # Ensure that the `mkfile` side effect copied all of the download artifacts into `tmpdir` dir_contents = os.listdir(tmpdir.strpath) assert file_path_1 in dir_contents assert file_path_2 in dir_contents
def test_download_artifact_throws_value_error_when_listed_blobs_do_not_contain_artifact_root_prefix( mock_client, ): repo = AzureBlobArtifactRepository(TEST_URI, mock_client) # Create a "bad blob" with a name that is not prefixed by the root path of the artifact store bad_blob_props = BlobProperties() bad_blob_props.size = 42 bad_blob_props.name = "file_path" def get_mock_listing(*args, **kwargs): """ Produces a mock listing that only contains content if the specified prefix is the artifact root. This allows us to mock `list_artifacts` during the `_download_artifacts_into` subroutine without recursively listing the same artifacts at every level of the directory traversal. """ # pylint: disable=unused-argument if posixpath.abspath(kwargs["name_starts_with"]) == posixpath.abspath( TEST_ROOT_PATH): # Return a blob that is not prefixed by the root path of the artifact store. This # should result in an exception being raised return MockBlobList([bad_blob_props]) else: return MockBlobList([]) mock_client.get_container_client( ).walk_blobs.side_effect = get_mock_listing with pytest.raises( MlflowException, match="Azure blob does not begin with the specified artifact path" ): repo.download_artifacts("")
def test_list_artifacts(mock_client, root_path): repo = AzureBlobArtifactRepository( posixpath.join(TEST_BLOB_CONTAINER_ROOT, root_path), mock_client ) # Create some files to return dir_prefix = BlobPrefix() dir_prefix.name = posixpath.join(TEST_ROOT_PATH, "dir") blob_props = BlobProperties() blob_props.size = 42 blob_props.name = posixpath.join(TEST_ROOT_PATH, "file") mock_client.get_container_client().walk_blobs.return_value = MockBlobList( [dir_prefix, blob_props] ) artifacts = repo.list_artifacts() mock_client.get_container_client().walk_blobs.assert_called_with(name_starts_with="some/path/") assert artifacts[0].path == "dir" assert artifacts[0].is_dir is True assert artifacts[0].file_size is None assert artifacts[1].path == "file" assert artifacts[1].is_dir is False assert artifacts[1].file_size == 42
def test_list_artifacts_single_file(mock_client): repo = AzureBlobArtifactRepository(TEST_URI, mock_client) # Evaluate single file blob_props = BlobProperties() blob_props.name = posixpath.join(TEST_ROOT_PATH, "file") mock_client.get_container_client().walk_blobs.return_value = MockBlobList([blob_props]) assert repo.list_artifacts("file") == []
def get_blob_properties(filename): logger = logging.getLogger(__name__) props = BlobProperties() content = ContentSettings() logger.debug('Settings props for file %s', filename) props.name = filename if '.json' in filename: content.content_type = 'application/json' elif '.html' in filename: content.content_type = 'text/html' else: content.content_type = 'text/plain' logger.debug('Content-type set to %s', content.content_type) content.content_disposition = f'inline; filename={filename}' props.content_settings = content return props