def test_blob_download_stream(binary_blob, temp_file): with open(temp_file, 'wb') as download_file: binary_blob.download(download_file) hash_type = binary_blob.driver.hash_type download_hash = file_checksum(temp_file, hash_type=hash_type) assert download_hash.hexdigest() == BINARY_MD5_CHECKSUM
def _make_blob(self, container: Container, object_name: str) -> Blob: """Convert local file name to a Cloud Storage Blob. :param container: Container instance. :type container: :class:`.Container` :param object_name: Filename. :type object_name: str :return: Blob instance. :rtype: :class:`.Blob` """ full_path = os.path.join(self.base_path, container.name, object_name) object_path = pathlib.Path(full_path) try: stat = os.stat(str(object_path)) except FileNotFoundError: raise NotFoundError(blob_not_found % (object_name, container.name)) meta_data = {} content_type = None content_disposition = None try: x = xattr.xattr(full_path) for attr_key, attr_value in x.items(): value_str = attr_value.decode('utf-8') if attr_key.startswith(self._OBJECT_META_PREFIX + 'metadata'): meta_key = attr_key.split('.')[-1] meta_data[meta_key] = value_str elif attr_key.endswith('content-type'): content_type = value_str elif attr_key.endswith('content-disposition'): content_disposition = value_str else: logger.warning("Unknown file attribute '%s'" % attr_key) except OSError: logger.warning(local_no_attributes) # TODO: QUESTION: Option to disable checksum for large files? # TODO: QUESTION: Save a .hash file for each file? checksum = file_checksum(full_path, hash_type=self.hash_type) etag = hashlib.sha1(full_path.encode('utf-8')).hexdigest() created_at = datetime.fromtimestamp(stat.st_ctime, timezone.utc) modified_at = datetime.fromtimestamp(stat.st_mtime, timezone.utc) return Blob(name=object_path.name, checksum=checksum, etag=etag, size=stat.st_size, container=container, driver=self, acl=None, meta_data=meta_data, content_disposition=content_disposition, content_type=content_type, created_at=created_at, modified_at=modified_at)
def upload_blob( self, container: Container, filename: FileLike, blob_name: str = None, acl: str = None, meta_data: MetaData = None, content_type: str = None, content_disposition: str = None, cache_control: str = None, chunk_size: int = 1024, extra: ExtraOptions = None, ) -> Blob: if acl: logger.info(messages.OPTION_NOT_SUPPORTED, "acl") meta_data = {} if meta_data is None else meta_data extra = extra if extra is not None else {} extra_args = self._normalize_parameters(extra, self._PUT_OBJECT_KEYS) extra_args.setdefault("content_type", content_type) extra_args.setdefault("content_disposition", content_disposition) extra_args.setdefault("cache_control", cache_control) azure_container = self._get_azure_container(container.name) blob_name = blob_name or validate_file_or_path(filename) # azure does not set content_md5 on backend file_hash = file_checksum(filename, hash_type=self.hash_type) file_digest = file_hash.digest() checksum = base64.b64encode(file_digest).decode("utf-8").strip() extra_args.setdefault("content_md5", checksum) content_settings = ContentSettings(**extra_args) if isinstance(filename, str): self.service.create_blob_from_path( container_name=azure_container.name, blob_name=blob_name, file_path=filename, content_settings=content_settings, metadata=meta_data, validate_content=True, ) else: self.service.create_blob_from_stream( container_name=azure_container.name, blob_name=blob_name, stream=filename, content_settings=content_settings, metadata=meta_data, validate_content=True, ) azure_blob = self._get_azure_blob(azure_container.name, blob_name) return self._convert_azure_blob(container, azure_blob)
def test_blob_upload_path(container, text_filename, temp_file): blob = container.upload_blob(text_filename) assert blob.name == settings.TEXT_FILENAME blob.download(temp_file) hash_type = blob.driver.hash_type download_hash = file_checksum(temp_file, hash_type=hash_type) blob_checksum = download_hash.hexdigest() assert blob_checksum == settings.TEXT_MD5_CHECKSUM
def test_blob_upload_stream(container, binary_stream, temp_file): blob = container.upload_blob(filename=binary_stream, blob_name=BINARY_STREAM_FILENAME, **BINARY_OPTIONS) assert blob.name == BINARY_STREAM_FILENAME blob.download(temp_file) hash_type = blob.driver.hash_type download_hash = file_checksum(temp_file, hash_type=hash_type) blob_checksum = download_hash.hexdigest() assert blob_checksum == BINARY_MD5_CHECKSUM
def test_blob_upload_options(container, binary_stream, temp_file): blob = container.upload_blob(filename=binary_stream, blob_name=BINARY_STREAM_FILENAME, **BINARY_OPTIONS) assert blob.name == BINARY_STREAM_FILENAME assert blob.meta_data == BINARY_OPTIONS['meta_data'] assert blob.content_type == BINARY_OPTIONS['content_type'] blob.download(temp_file) hash_type = blob.driver.hash_type download_hash = file_checksum(temp_file, hash_type=hash_type) assert download_hash.hexdigest() == BINARY_MD5_CHECKSUM
def test_blob_generate_download_url(binary_blob, temp_file): content_disposition = BINARY_OPTIONS.get('content_disposition') download_url = binary_blob.generate_download_url( content_disposition=content_disposition) assert uri_validator(download_url) response = requests.get(download_url) assert response.status_code == HTTPStatus.OK, response.text assert response.headers['content-disposition'] == content_disposition with open(temp_file, 'wb') as f: for chunk in response.iter_content(chunk_size=128): f.write(chunk) hash_type = binary_blob.driver.hash_type download_hash = file_checksum(temp_file, hash_type=hash_type) assert download_hash.hexdigest() == BINARY_MD5_CHECKSUM
def test_blob_generate_download_url(binary_blob, temp_file): content_disposition = BINARY_OPTIONS.get('content_disposition') download_url = binary_blob.generate_download_url( content_disposition=content_disposition) assert uri_validator(download_url) response = requests.get(download_url) assert response.status_code == HTTPStatus.OK, response.text # Rackspace adds extra garbage to the header # 'attachment; filename=avatar-attachment.png; # filename*=UTF-8\\'\\'avatar-attachment.png' parsed_disposition = parse_headers(response.headers['content-disposition']) response_disposition = '{}; filename={}'.format( parsed_disposition.disposition, parsed_disposition.filename_unsafe) assert response_disposition == content_disposition with open(temp_file, 'wb') as f: for chunk in response.iter_content(chunk_size=128): f.write(chunk) hash_type = binary_blob.driver.hash_type download_hash = file_checksum(temp_file, hash_type=hash_type) assert download_hash == BINARY_MD5_CHECKSUM
def test_blob_generate_download_url(binary_blob, temp_file): content_disposition = settings.BINARY_OPTIONS.get("content_disposition") download_url = binary_blob.generate_download_url( content_disposition=content_disposition) assert uri_validator(download_url) response = requests.get(download_url) assert response.status_code == HTTPStatus.OK, response.text # Rackspace adds extra garbage to the header # 'attachment; filename=avatar-attachment.png; # filename*=UTF-8\\'\\'avatar-attachment.png' disposition, params = parse_content_disposition( response.headers["content-disposition"]) response_disposition = "{}; filename={}".format(disposition, params["filename"]) assert response_disposition == content_disposition with open(temp_file, "wb") as f: for chunk in response.iter_content(chunk_size=128): f.write(chunk) hash_type = binary_blob.driver.hash_type download_hash = file_checksum(temp_file, hash_type=hash_type) assert download_hash.hexdigest() == settings.BINARY_MD5_CHECKSUM
def test_blob_download_path(binary_blob, temp_file): binary_blob.download(temp_file) hash_type = binary_blob.driver.hash_type download_hash = file_checksum(temp_file, hash_type=hash_type) assert download_hash.hexdigest() == BINARY_MD5_CHECKSUM
def test_file_checksum(text_filename): checksum = file_checksum(text_filename, hash_type='md5', block_size=32) assert checksum == TEXT_MD5_CHECKSUM
def _make_blob(self, container: Container, object_name: str) -> Blob: """Convert local file name to a Cloud Storage Blob. :param container: Container instance. :type container: :class:`.Container` :param object_name: Filename. :type object_name: str :return: Blob instance. :rtype: :class:`.Blob` """ full_path = os.path.join(self.base_path, container.name, object_name) if not self._check_path_accessible(full_path): raise NotFoundError(messages.BLOB_NOT_FOUND % (object_name, container.name)) object_path = pathlib.Path(full_path) try: stat = os.stat(str(object_path)) except FileNotFoundError: raise NotFoundError(messages.BLOB_NOT_FOUND % (object_name, container.name)) meta_data = {} content_type = None content_disposition = None cache_control = None try: attributes = self._make_xattr(full_path) for attr_key, attr_value in attributes.items(): value_str = None try: value_str = attr_value.decode("utf-8") except UnicodeDecodeError: pass if attr_key.startswith(self._OBJECT_META_PREFIX + "metadata"): meta_key = attr_key.split(".")[-1] meta_data[meta_key] = value_str elif attr_key.endswith("content_type"): content_type = value_str elif attr_key.endswith("content_disposition"): content_disposition = value_str elif attr_key.endswith("cache_control"): cache_control = value_str else: logger.warning("Unknown file attribute '%s'", attr_key) except OSError: logger.warning(messages.LOCAL_NO_ATTRIBUTES) # TODO: QUESTION: Option to disable checksum for large files? # TODO: QUESTION: Save a .hash file for each file? file_hash = file_checksum(full_path, hash_type=self.hash_type) checksum = file_hash.hexdigest() etag = hashlib.sha1(full_path.encode("utf-8")).hexdigest() created_at = datetime.fromtimestamp(stat.st_ctime, timezone.utc) modified_at = datetime.fromtimestamp(stat.st_mtime, timezone.utc) return Blob( name=object_name, checksum=checksum, etag=etag, size=stat.st_size, container=container, driver=self, acl=None, meta_data=meta_data, content_disposition=content_disposition, content_type=content_type, cache_control=cache_control, created_at=created_at, modified_at=modified_at, )
def test_file_checksum_stream(binary_stream): file_hash = file_checksum(binary_stream, hash_type='md5', block_size=32) assert file_hash.hexdigest() == BINARY_MD5_CHECKSUM assert binary_stream.tell() == 0
def test_file_checksum_filename(text_filename): file_hash = file_checksum(text_filename, hash_type='md5', block_size=32) assert file_hash.hexdigest() == TEXT_MD5_CHECKSUM