async def test_bulk_download_multi_container(random_blob_list): blobs_dict = {k: v for k, v in enumerate(random_blob_list)} fileurls_dict = {k: v.file_url for k, v in enumerate(random_blob_list)} count = len(blobs_dict) path_dict = await bulk_download(fileurls_dict) assert len(path_dict) == count for key, blob in blobs_dict.items(): hash_type = blob.driver.hash_type assert os.path.isfile(path_dict[key]) download_hash = file_checksum(path_dict[key], hash_type=hash_type) assert download_hash.hexdigest() == blob.checksum
async def test_bulk_upload(container): filecount = 10 iostreams = binary_iostreams(filecount) destpath = random_container_name() files = await bulk_upload(iostreams, destpath='/' + destpath) assert isinstance(files, dict) assert len(files) == filecount hash_type = container.driver.hash_type for key, fileurl in files.items(): iostreams[key].seek(0) download_hash = file_checksum(iostreams[key], hash_type=hash_type) blob = await container.get_blob(fileurl) assert blob.name.startswith(destpath) assert blob.checksum == download_hash.hexdigest()
async def test_bulk_upload_with_one_invalid(container): filecount = 10 iostreams = binary_iostreams(filecount) iostreams[filecount + 1] = io.BytesIO(b'') destpath = random_container_name() files = await bulk_upload(iostreams, destpath='/' + destpath) assert isinstance(files, dict) assert len(files) == filecount + 1 hash_type = container.driver.hash_type for key, fileurl in files.items(): if key == (filecount + 1): assert isinstance(fileurl, Exception) continue iostreams[key].seek(0) download_hash = file_checksum(iostreams[key], hash_type=hash_type) blob = await container.get_blob(fileurl) assert blob.name.startswith(destpath) assert blob.checksum == download_hash.hexdigest()
async def test_download_without_destination(binary_blob): download_file = await download(binary_blob.file_url) hash_type = binary_blob.driver.hash_type download_hash = file_checksum(download_file, hash_type=hash_type) assert download_hash.hexdigest() == binary_blob.checksum
async def test_download_stream(binary_blob, temp_file): with open(temp_file, 'wb') as download_file: await download(binary_blob.file_url, download_file) hash_type = binary_blob.driver.hash_type download_hash = file_checksum(temp_file, hash_type=hash_type) assert download_hash.hexdigest() == binary_blob.checksum
async def test_download_file_path(binary_blob, temp_file): filepath = await download(binary_blob.file_url, destfilename=temp_file) hash_type = binary_blob.driver.hash_type download_hash = file_checksum(filepath, hash_type=hash_type) assert download_hash.hexdigest() == binary_blob.checksum
def test_file_checksum_stream(binary_stream): file_hash = file_checksum(binary_stream, hash_type='md5', block_size=32) assert file_hash.hexdigest() == BINARY_MD5_CHECKSUM assert binary_stream.tell() == 0
def test_file_checksum_filename(text_filename): file_hash = file_checksum(text_filename, hash_type='md5', block_size=32) assert file_hash.hexdigest() == TEXT_MD5_CHECKSUM
async def test_blob_download_stream(binary_blob, temp_file): with open(temp_file, 'wb') as download_file: await binary_blob.download(download_file) hash_type = binary_blob.driver.hash_type download_hash = file_checksum(temp_file, hash_type=hash_type) assert download_hash.hexdigest() == BINARY_MD5_CHECKSUM
async def test_blob_download_path(binary_blob, temp_file): await binary_blob.download(temp_file) hash_type = binary_blob.driver.hash_type download_hash = file_checksum(temp_file, hash_type=hash_type) assert download_hash.hexdigest() == BINARY_MD5_CHECKSUM
def _make_blob(self, container: Container, object_name: str) -> Blob: """Convert local file name to a Cloud Storage Blob. :param container: Container instance. :type container: :class:`.Container` :param object_name: Filename. :type object_name: str :return: Blob instance. :rtype: :class:`.Blob` """ full_path = os.path.join(self.base_path, container.name, object_name) object_path = pathlib.Path(full_path) try: stat = os.stat(str(object_path)) except FileNotFoundError: raise NotFoundError(messages.BLOB_NOT_FOUND % (object_name, container.name)) meta_data = {} content_type = None content_disposition = None cache_control = None try: attributes = xattr.xattr(full_path) for attr_key, attr_value in attributes.items(): value_str = None try: value_str = attr_value.decode('utf-8') except UnicodeDecodeError: pass if attr_key.startswith(self._OBJECT_META_PREFIX + 'metadata'): meta_key = attr_key.split('.')[-1] meta_data[meta_key] = value_str elif attr_key.endswith('content_type'): content_type = value_str elif attr_key.endswith('content_disposition'): content_disposition = value_str elif attr_key.endswith('cache_control'): cache_control = value_str else: logger.warning("Unknown file attribute '%s'", attr_key) except OSError: logger.warning(messages.LOCAL_NO_ATTRIBUTES) # TODO: QUESTION: Option to disable checksum for large files? # TODO: QUESTION: Save a .hash file for each file? file_hash = file_checksum(full_path, hash_type=self.hash_type) checksum = file_hash.hexdigest() etag = hashlib.sha1(full_path.encode('utf-8')).hexdigest() created_at = datetime.fromtimestamp(stat.st_ctime, timezone.utc) modified_at = datetime.fromtimestamp(stat.st_mtime, timezone.utc) return Blob(name=object_name, checksum=checksum, etag=etag, size=stat.st_size, container=container, driver=self, acl=None, meta_data=meta_data, content_disposition=content_disposition, content_type=content_type, cache_control=cache_control, created_at=created_at, modified_at=modified_at)