def upload_object(self, file_path, container, object_name, extra=None, verify_hash=True, headers=None): """ Upload an object. Note: This will override file with a same name if it already exists. """ # Note: We don't use any of the base driver functions since Backblaze # API requires you to provide SHA1 has upfront and the base methods # don't support that with open(file_path, 'rb') as fp: iterator = iter(fp) iterator = read_in_chunks(iterator=iterator) data = exhaust_iterator(iterator=iterator) obj = self._perform_upload(data=data, container=container, object_name=object_name, extra=extra, verify_hash=verify_hash, headers=headers) return obj
def upload_object_via_stream(self, iterator, container, object_name, extra=None): if isinstance(iterator, file): iterator = iter(iterator) data_hash = hashlib.md5() generator = read_in_chunks(iterator, CHUNK_SIZE, True) bytes_transferred = 0 try: chunk = next(generator) except StopIteration: chunk = '' path = self._namespace_path(container.name + '/' + object_name) while True: end = bytes_transferred + len(chunk) - 1 data_hash.update(b(chunk)) headers = { 'x-emc-meta': 'md5=' + data_hash.hexdigest(), } if len(chunk) > 0: headers['Range'] = 'Bytes=%d-%d' % (bytes_transferred, end) result = self.connection.request(path, method='PUT', data=chunk, headers=headers) bytes_transferred += len(chunk) try: chunk = next(generator) except StopIteration: break if len(chunk) == 0: break data_hash = data_hash.hexdigest() if extra is None: meta_data = {} else: meta_data = extra.get('meta_data', {}) meta_data['md5'] = data_hash user_meta = ', '.join([k + '=' + str(v) for k, v in list(meta_data.items())]) self.connection.request(path + '?metadata/user', method='POST', headers={'x-emc-meta': user_meta}) result = self.connection.request(path + '?metadata/system') meta = self._emc_meta(result) extra = { 'object_id': meta['objectid'], 'meta_data': meta_data, } return Object(object_name, bytes_transferred, data_hash, extra, meta_data, container, self)
def download_object_as_stream(self, obj, chunk_size=None): """ Return a generator which yields object data. :param obj: Object instance :type obj: :class:`Object` :param chunk_size: Optional chunk size (in bytes). :type chunk_size: ``int`` :return: A stream of binary chunks of data. :rtype: ``object`` """ path = self.get_object_cdn_url(obj) with open(path, 'rb') as obj_file: for data in read_in_chunks(obj_file, chunk_size=chunk_size): yield data
def download_object_as_stream(self, obj, chunk_size=None): """ Return a generator which yields object data. :param obj: Object instance :type obj: :class:`Object` :param chunk_size: Optional chunk size (in bytes). :type chunk_size: ``int`` :return: A stream of binary chunks of data. :rtype: ``object`` """ path = self.get_object_cdn_url(obj) with open(path, "rb") as obj_file: for data in read_in_chunks(obj_file, chunk_size=chunk_size): yield data
def download_object_as_stream(self, obj, chunk_size=None): """ Return a generator which yields object data. @param obj: Object instance @type obj: L{Object} @param chunk_size: Optional chunk size (in bytes). @type chunk_size: C{int} @rtype: C{object} """ path = self.get_object_cdn_url(obj) with open(path) as obj_file: for data in read_in_chunks(obj_file, chunk_size=chunk_size): yield data
def upload_object_via_stream(self, iterator, container, object_name, extra=None, headers=None): """ Upload an object. Note: Backblaze does not yet support uploading via stream, so this calls upload_object internally requiring the object data to be loaded into memory at once """ iterator = read_in_chunks(iterator=iterator) data = exhaust_iterator(iterator=iterator) obj = self._perform_upload(data=data, container=container, object_name=object_name, extra=extra, headers=headers) return obj
def _upload_in_chunks(self, response, data, iterator, object_path, blob_type, lease, calculate_hash=True): """ Uploads data from an interator in fixed sized chunks to S3 :param response: Response object from the initial POST request :type response: :class:`RawResponse` :param data: Any data from the initial POST request :type data: ``str`` :param iterator: The generator for fetching the upload data :type iterator: ``generator`` :param object_path: The path of the object to which we are uploading :type object_name: ``str`` :param blob_type: The blob type being uploaded :type blob_type: ``str`` :param lease: The lease object to be used for renewal :type lease: :class:`AzureBlobLease` :keyword calculate_hash: Indicates if we must calculate the data hash :type calculate_hash: ``bool`` :return: A tuple of (status, checksum, bytes transferred) :rtype: ``tuple`` """ # Get the upload id from the response xml if response.status != httplib.CREATED: raise LibcloudError('Error initializing upload. Code: %d' % (response.status), driver=self) data_hash = None if calculate_hash: data_hash = self._get_hash_function() bytes_transferred = 0 count = 1 chunks = [] headers = {} lease.update_headers(headers) if blob_type == 'BlockBlob': params = {'comp': 'block'} else: params = {'comp': 'page'} # Read the input data in chunk sizes suitable for AWS for data in read_in_chunks(iterator, AZURE_CHUNK_SIZE): data = b(data) content_length = len(data) offset = bytes_transferred bytes_transferred += content_length if calculate_hash: data_hash.update(data) chunk_hash = self._get_hash_function() chunk_hash.update(data) chunk_hash = base64.b64encode(b(chunk_hash.digest())) headers['Content-MD5'] = chunk_hash.decode('utf-8') headers['Content-Length'] = str(content_length) if blob_type == 'BlockBlob': # Block id can be any unique string that is base64 encoded # A 10 digit number can hold the max value of 50000 blocks # that are allowed for azure block_id = base64.b64encode(b('%10d' % (count))) block_id = block_id.decode('utf-8') params['blockid'] = block_id # Keep this data for a later commit chunks.append(block_id) else: headers['x-ms-page-write'] = 'update' headers['x-ms-range'] = 'bytes=%d-%d' % \ (offset, (bytes_transferred - 1)) # Renew lease before updating lease.renew() resp = self.connection.request(object_path, method='PUT', data=data, headers=headers, params=params) if resp.status != httplib.CREATED: resp.parse_error() raise LibcloudError('Error uploading chunk %d. Code: %d' % (count, resp.status), driver=self) count += 1 if calculate_hash: data_hash = data_hash.hexdigest() if blob_type == 'BlockBlob': self._commit_blocks(object_path, chunks, lease) # The Azure service does not return a hash immediately for # chunked uploads. It takes some time for the data to get synced response.headers['content-md5'] = None return (True, data_hash, bytes_transferred)
def _upload_from_iterator(self, iterator, object_path, upload_id, calculate_hash=True): """ Uploads data from an interator in fixed sized chunks to S3 :param iterator: The generator for fetching the upload data :type iterator: ``generator`` :param object_path: The path of the object to which we are uploading :type object_name: ``str`` :param upload_id: The upload id allocated for this multipart upload :type upload_id: ``str`` :keyword calculate_hash: Indicates if we must calculate the data hash :type calculate_hash: ``bool`` :return: A tuple of (chunk info, checksum, bytes transferred) :rtype: ``tuple`` """ data_hash = None if calculate_hash: data_hash = self._get_hash_function() bytes_transferred = 0 count = 1 chunks = [] params = {'uploadId': upload_id} # Read the input data in chunk sizes suitable for AWS for data in read_in_chunks(iterator, chunk_size=CHUNK_SIZE, fill_size=True, yield_empty=True): bytes_transferred += len(data) if calculate_hash: data_hash.update(data) chunk_hash = self._get_hash_function() chunk_hash.update(data) chunk_hash = base64.b64encode(chunk_hash.digest()).decode('utf-8') # This provides an extra level of data check and is recommended # by amazon headers = {'Content-MD5': chunk_hash} params['partNumber'] = count request_path = '?'.join((object_path, urlencode(params))) resp = self.connection.request(request_path, method='PUT', data=data, headers=headers) if resp.status != httplib.OK: raise LibcloudError('Error uploading chunk', driver=self) server_hash = resp.headers['etag'] # Keep this data for a later commit chunks.append((count, server_hash)) count += 1 if calculate_hash: data_hash = data_hash.hexdigest() return (chunks, data_hash, bytes_transferred)
def _upload_in_chunks(self, stream, object_path, lease, meta_data, content_type, object_name, file_path, verify_hash, headers): """ Uploads data from an interator in fixed sized chunks to Azure Storage """ data_hash = None if verify_hash: data_hash = self._get_hash_function() bytes_transferred = 0 count = 1 chunks = [] headers = headers or {} lease.update_headers(headers) params = {'comp': 'block'} # Read the input data in chunk sizes suitable for Azure for data in read_in_chunks(stream, AZURE_UPLOAD_CHUNK_SIZE, fill_size=True): data = b(data) content_length = len(data) bytes_transferred += content_length if verify_hash: data_hash.update(data) chunk_hash = self._get_hash_function() chunk_hash.update(data) chunk_hash = base64.b64encode(b(chunk_hash.digest())) headers['Content-MD5'] = chunk_hash.decode('utf-8') headers['Content-Length'] = str(content_length) # Block id can be any unique string that is base64 encoded # A 10 digit number can hold the max value of 50000 blocks # that are allowed for azure block_id = base64.b64encode(b('%10d' % (count))) block_id = block_id.decode('utf-8') params['blockid'] = block_id # Keep this data for a later commit chunks.append(block_id) # Renew lease before updating lease.renew() resp = self.connection.request(object_path, method='PUT', data=data, headers=headers, params=params) if resp.status != httplib.CREATED: resp.parse_error() raise LibcloudError('Error uploading chunk %d. Code: %d' % (count, resp.status), driver=self) count += 1 if verify_hash: data_hash = base64.b64encode(b(data_hash.digest())) data_hash = data_hash.decode('utf-8') response = self._commit_blocks(object_path=object_path, chunks=chunks, lease=lease, meta_data=meta_data, content_type=content_type, data_hash=data_hash, object_name=object_name, file_path=file_path) # According to the Azure docs: # > This header refers to the content of the request, meaning, in this # > case, the list of blocks, and not the content of the blob itself. # However, the validation code assumes that the content-md5 in the # server response refers to the object so we must discard the value response.headers['content-md5'] = None return { 'response': response, 'data_hash': data_hash, 'bytes_transferred': bytes_transferred, }
def upload_object_via_stream(self, iterator, container, object_name, extra=None): if isinstance(iterator, file): iterator = iter(iterator) data_hash = hashlib.md5() generator = read_in_chunks(iterator, CHUNK_SIZE, True) bytes_transferred = 0 try: chunk = next(generator) except StopIteration: chunk = '' path = self._namespace_path(container.name + '/' + object_name) method = 'PUT' if extra is not None: content_type = extra.get('content_type', None) else: content_type = None if not content_type: content_type, _ = guess_file_mime_type(object_name) if not content_type: raise AttributeError( 'File content-type could not be guessed and' + ' no content_type value provided') try: self.connection.request(path + '?metadata/system') except AtmosError: e = sys.exc_info()[1] if e.code != 1003: raise method = 'POST' while True: end = bytes_transferred + len(chunk) - 1 data_hash.update(b(chunk)) headers = { 'x-emc-meta': 'md5=' + data_hash.hexdigest(), 'Content-Type': content_type, } if len(chunk) > 0 and bytes_transferred > 0: headers['Range'] = 'Bytes=%d-%d' % (bytes_transferred, end) method = 'PUT' result = self.connection.request(path, method=method, data=chunk, headers=headers) bytes_transferred += len(chunk) try: chunk = next(generator) except StopIteration: break if len(chunk) == 0: break data_hash = data_hash.hexdigest() if extra is None: meta_data = {} else: meta_data = extra.get('meta_data', {}) meta_data['md5'] = data_hash user_meta = ', '.join([k + '=' + str(v) for k, v in list(meta_data.items())]) self.connection.request(path + '?metadata/user', method='POST', headers={'x-emc-meta': user_meta}) result = self.connection.request(path + '?metadata/system') meta = self._emc_meta(result) extra = { 'object_id': meta['objectid'], 'meta_data': meta_data, } return Object(object_name, bytes_transferred, data_hash, extra, meta_data, container, self)
def _upload_in_chunks(self, response, data, iterator, object_path, blob_type, lease, calculate_hash=True): """ Uploads data from an interator in fixed sized chunks to S3 :param response: Response object from the initial POST request :type response: :class:`RawResponse` :param data: Any data from the initial POST request :type data: ``str`` :param iterator: The generator for fetching the upload data :type iterator: ``generator`` :param object_path: The path of the object to which we are uploading :type object_name: ``str`` :param blob_type: The blob type being uploaded :type blob_type: ``str`` :param lease: The lease object to be used for renewal :type lease: :class:`AzureBlobLease` :keyword calculate_hash: Indicates if we must calculate the data hash :type calculate_hash: ``bool`` :return: A tuple of (status, checksum, bytes transferred) :rtype: ``tuple`` """ # Get the upload id from the response xml if response.status != httplib.CREATED: raise LibcloudError('Error initializing upload. Code: %d' % (response.status), driver=self) data_hash = None if calculate_hash: data_hash = self._get_hash_function() bytes_transferred = 0 count = 1 chunks = [] headers = {} lease.update_headers(headers) if blob_type == 'BlockBlob': params = {'comp': 'block'} else: params = {'comp': 'page'} # Read the input data in chunk sizes suitable for AWS for data in read_in_chunks(iterator, AZURE_CHUNK_SIZE): data = b(data) content_length = len(data) offset = bytes_transferred bytes_transferred += content_length if calculate_hash: data_hash.update(data) chunk_hash = self._get_hash_function() chunk_hash.update(data) chunk_hash = base64.b64encode(b(chunk_hash.digest())) headers['Content-MD5'] = chunk_hash.decode('utf-8') headers['Content-Length'] = content_length if blob_type == 'BlockBlob': # Block id can be any unique string that is base64 encoded # A 10 digit number can hold the max value of 50000 blocks # that are allowed for azure block_id = base64.b64encode(b('%10d' % (count))) block_id = block_id.decode('utf-8') params['blockid'] = block_id # Keep this data for a later commit chunks.append(block_id) else: headers['x-ms-page-write'] = 'update' headers['x-ms-range'] = 'bytes=%d-%d' % \ (offset, bytes_transferred-1) # Renew lease before updating lease.renew() resp = self.connection.request(object_path, method='PUT', data=data, headers=headers, params=params) if resp.status != httplib.CREATED: resp.parse_error() raise LibcloudError('Error uploading chunk %d. Code: %d' % (count, resp.status), driver=self) count += 1 if calculate_hash: data_hash = data_hash.hexdigest() if blob_type == 'BlockBlob': self._commit_blocks(object_path, chunks, lease) # The Azure service does not return a hash immediately for # chunked uploads. It takes some time for the data to get synced response.headers['content-md5'] = None return (True, data_hash, bytes_transferred)
def upload_object_via_stream(self, iterator, container, object_name, extra=None): if isinstance(iterator, file): iterator = iter(iterator) data_hash = hashlib.md5() generator = read_in_chunks(iterator, CHUNK_SIZE, True) bytes_transferred = 0 try: chunk = next(generator) except StopIteration: chunk = "" path = self._namespace_path(container.name + "/" + object_name) method = "PUT" if extra is not None: content_type = extra.get("content_type", None) else: content_type = None if not content_type: content_type, _ = guess_file_mime_type(object_name) if not content_type: raise AttributeError("File content-type could not be guessed and" + " no content_type value provided") try: self.connection.request(path + "?metadata/system") except AtmosError: e = sys.exc_info()[1] if e.code != 1003: raise method = "POST" while True: end = bytes_transferred + len(chunk) - 1 data_hash.update(b(chunk)) headers = {"x-emc-meta": "md5=" + data_hash.hexdigest(), "Content-Type": content_type} if len(chunk) > 0 and bytes_transferred > 0: headers["Range"] = "Bytes=%d-%d" % (bytes_transferred, end) method = "PUT" result = self.connection.request(path, method=method, data=chunk, headers=headers) bytes_transferred += len(chunk) try: chunk = next(generator) except StopIteration: break if len(chunk) == 0: break data_hash = data_hash.hexdigest() if extra is None: meta_data = {} else: meta_data = extra.get("meta_data", {}) meta_data["md5"] = data_hash user_meta = ", ".join([k + "=" + str(v) for k, v in list(meta_data.items())]) self.connection.request(path + "?metadata/user", method="POST", headers={"x-emc-meta": user_meta}) result = self.connection.request(path + "?metadata/system") meta = self._emc_meta(result) extra = {"object_id": meta["objectid"], "meta_data": meta_data} return Object(object_name, bytes_transferred, data_hash, extra, meta_data, container, self)
def upload_object(self, file_path, container, object_name, extra=None, verify_hash=True, headers=None): """ Upload an object. Note: This will override file with a same name if it already exists. """ # Note: We don't use any of the base driver functions since Backblaze # API requires you to provide SHA1 has upfront and the base methods # don't support that with open(file_path, 'rb') as fp: iterator = iter(fp) iterator = read_in_chunks(iterator=iterator) data = exhaust_iterator(iterator=iterator) extra = extra or {} content_type = extra.get('content_type', 'b2/x-auto') meta_data = extra.get('meta_data', {}) # Note: Backblaze API doesn't support chunked encoding and we need to # provide Content-Length up front (this is one inside _upload_object):/ headers = headers or {} headers['X-Bz-File-Name'] = object_name headers['Content-Type'] = content_type sha1 = hashlib.sha1() sha1.update(b(data)) headers['X-Bz-Content-Sha1'] = sha1.hexdigest() # Include optional meta-data (up to 10 items) for key, value in meta_data: # TODO: Encode / escape key headers['X-Bz-Info-%s' % (key)] = value upload_data = self.ex_get_upload_data( container_id=container.extra['id']) upload_token = upload_data['authorizationToken'] parsed_url = urlparse.urlparse(upload_data['uploadUrl']) upload_host = parsed_url.netloc request_path = parsed_url.path response = self.connection.upload_request(action=request_path, headers=headers, upload_host=upload_host, auth_token=upload_token, data=data) if response.status == httplib.OK: obj = self._to_object(item=response.object, container=container) return obj else: body = response.response.read() raise LibcloudError('Upload failed. status_code=%s, body=%s' % (response.status, body), driver=self)
def _upload_object(self, object_name, content_type, upload_func, upload_func_kwargs, request_path, request_method='PUT', headers=None, file_path=None, iterator=None, container=None): """ Helper function for setting common request headers and calling the passed in callback which uploads an object. """ headers = headers or {} if file_path and not os.path.exists(file_path): raise OSError('File %s does not exist' % (file_path)) if iterator is not None and not hasattr(iterator, 'next') and not \ hasattr(iterator, '__next__'): raise AttributeError('iterator object must implement next() ' + 'method.') if not content_type: if file_path: name = file_path else: name = object_name content_type, _ = guess_file_mime_type(name) if not content_type: if self.strict_mode: raise AttributeError('File content-type could not be ' 'guessed and no content_type value ' 'is provided') else: # Fallback to a content-type content_type = DEFAULT_CONTENT_TYPE file_size = None if iterator: if self.supports_chunked_encoding: headers['Transfer-Encoding'] = 'chunked' upload_func_kwargs['chunked'] = True else: # Chunked transfer encoding is not supported. Need to buffer # all the data in memory so we can determine file size. iterator = read_in_chunks(iterator=iterator) data = exhaust_iterator(iterator=iterator) file_size = len(data) upload_func_kwargs['data'] = data else: file_size = os.path.getsize(file_path) upload_func_kwargs['chunked'] = False if file_size is not None and 'Content-Length' not in headers: headers['Content-Length'] = file_size headers['Content-Type'] = content_type response = self.connection.request(request_path, method=request_method, data=None, headers=headers, raw=True, container=container) upload_func_kwargs['response'] = response success, data_hash, bytes_transferred = upload_func( **upload_func_kwargs) if not success: raise LibcloudError( value='Object upload failed, Perhaps a timeout?', driver=self) result_dict = { 'response': response, 'data_hash': data_hash, 'bytes_transferred': bytes_transferred } return result_dict
def _upload_from_iterator(self, iterator, object_path, upload_id, calculate_hash=True): """ Uploads data from an interator in fixed sized chunks to S3 @param iterator: The generator for fetching the upload data @type iterator: C{generator} @param object_path: The path of the object to which we are uploading @type object_name: C{str} @param upload_id: The upload id allocated for this multipart upload @type upload_id: C{str} @keyword calculate_hash: Indicates if we must calculate the data hash @type calculate_hash: C{bool} @return: A tuple of (chunk info, checksum, bytes transferred) @rtype: C{tuple} """ data_hash = None if calculate_hash: data_hash = self._get_hash_function() bytes_transferred = 0 count = 1 chunks = [] params = {'uploadId': upload_id} # Read the input data in chunk sizes suitable for AWS for data in read_in_chunks(iterator, chunk_size=CHUNK_SIZE, fill_size=True): bytes_transferred += len(data) if calculate_hash: data_hash.update(data) chunk_hash = self._get_hash_function() chunk_hash.update(data) chunk_hash = base64.b64encode(chunk_hash.digest()).decode('utf-8') # This provides an extra level of data check and is recommended # by amazon headers = {'Content-MD5': chunk_hash} params['partNumber'] = count request_path = '?'.join((object_path, urlencode(params))) resp = self.connection.request(request_path, method='PUT', data=data, headers=headers) if resp.status != httplib.OK: raise LibcloudError('Error uploading chunk', driver=self) server_hash = resp.headers['etag'] # Keep this data for a later commit chunks.append((count, server_hash)) count += 1 if calculate_hash: data_hash = data_hash.hexdigest() return (chunks, data_hash, bytes_transferred)
def _upload_from_iterator(self, iterator, object_path, upload_id, calculate_hash=True, container=None): """ Uploads data from an interator in fixed sized chunks to OSS :param iterator: The generator for fetching the upload data :type iterator: ``generator`` :param object_path: The path of the object to which we are uploading :type object_name: ``str`` :param upload_id: The upload id allocated for this multipart upload :type upload_id: ``str`` :keyword calculate_hash: Indicates if we must calculate the data hash :type calculate_hash: ``bool`` :keyword container: the container object to upload object to :type container: :class:`Container` :return: A tuple of (chunk info, checksum, bytes transferred) :rtype: ``tuple`` """ data_hash = None if calculate_hash: data_hash = self._get_hash_function() bytes_transferred = 0 count = 1 chunks = [] params = {'uploadId': upload_id} # Read the input data in chunk sizes suitable for AWS for data in read_in_chunks(iterator, chunk_size=CHUNK_SIZE, fill_size=True, yield_empty=True): bytes_transferred += len(data) if calculate_hash: data_hash.update(data) chunk_hash = self._get_hash_function() chunk_hash.update(data) chunk_hash = base64.b64encode(chunk_hash.digest()).decode('utf-8') # OSS will calculate hash of the uploaded data and # check this header. headers = {'Content-MD5': chunk_hash} params['partNumber'] = count request_path = '?'.join((object_path, urlencode(params))) resp = self.connection.request(request_path, method='PUT', data=data, headers=headers, container=container) if resp.status != httplib.OK: raise LibcloudError('Error uploading chunk', driver=self) server_hash = resp.headers['etag'] # Keep this data for a later commit chunks.append((count, server_hash)) count += 1 if calculate_hash: data_hash = data_hash.hexdigest() return (chunks, data_hash, bytes_transferred)
def upload_object_via_stream(self, iterator, container, object_name, extra=None, headers=None): if isinstance(iterator, file): iterator = iter(iterator) extra_headers = headers or {} data_hash = hashlib.md5() generator = read_in_chunks(iterator, CHUNK_SIZE, True) bytes_transferred = 0 try: chunk = next(generator) except StopIteration: chunk = "" path = self._namespace_path(container.name + "/" + object_name) method = "PUT" if extra is not None: content_type = extra.get("content_type", None) else: content_type = None content_type = self._determine_content_type(content_type, object_name) try: self.connection.request(path + "?metadata/system") except AtmosError as e: if e.code != 1003: raise method = "POST" while True: end = bytes_transferred + len(chunk) - 1 data_hash.update(b(chunk)) headers = dict(extra_headers) headers.update({ "x-emc-meta": "md5=" + data_hash.hexdigest(), "Content-Type": content_type, }) if len(chunk) > 0 and bytes_transferred > 0: headers["Range"] = "Bytes=%d-%d" % (bytes_transferred, end) method = "PUT" result = self.connection.request(path, method=method, data=chunk, headers=headers) bytes_transferred += len(chunk) try: chunk = next(generator) except StopIteration: break if len(chunk) == 0: break data_hash = data_hash.hexdigest() if extra is None: meta_data = {} else: meta_data = extra.get("meta_data", {}) meta_data["md5"] = data_hash user_meta = ", ".join( [k + "=" + str(v) for k, v in list(meta_data.items())]) self.connection.request(path + "?metadata/user", method="POST", headers={"x-emc-meta": user_meta}) result = self.connection.request(path + "?metadata/system") meta = self._emc_meta(result) extra = { "object_id": meta["objectid"], "meta_data": meta_data, } return Object(object_name, bytes_transferred, data_hash, extra, meta_data, container, self)
def _upload_multipart_chunks(self, container, object_name, upload_id, stream, calculate_hash=True): """ Uploads data from an iterator in fixed sized chunks to S3 :param container: The destination container :type container: :class:`Container` :param object_name: The name of the object which we are uploading :type object_name: ``str`` :param upload_id: The upload id allocated for this multipart upload :type upload_id: ``str`` :param stream: The generator for fetching the upload data :type stream: ``generator`` :keyword calculate_hash: Indicates if we must calculate the data hash :type calculate_hash: ``bool`` :return: A tuple of (chunk info, checksum, bytes transferred) :rtype: ``tuple`` """ data_hash = None if calculate_hash: data_hash = self._get_hash_function() bytes_transferred = 0 count = 1 chunks = [] params = {'uploadId': upload_id} request_path = self._get_object_path(container, object_name) # Read the input data in chunk sizes suitable for AWS for data in read_in_chunks(stream, chunk_size=CHUNK_SIZE, fill_size=True, yield_empty=True): bytes_transferred += len(data) if calculate_hash: data_hash.update(data) chunk_hash = self._get_hash_function() chunk_hash.update(data) chunk_hash = base64.b64encode(chunk_hash.digest()).decode('utf-8') # The Content-MD5 header provides an extra level of data check and # is recommended by amazon headers = { 'Content-Length': len(data), 'Content-MD5': chunk_hash, } params['partNumber'] = count resp = self.connection.request(request_path, method='PUT', data=data, headers=headers, params=params) if resp.status != httplib.OK: raise LibcloudError('Error uploading chunk', driver=self) server_hash = resp.headers['etag'].replace('"', '') # Keep this data for a later commit chunks.append((count, server_hash)) count += 1 if calculate_hash: data_hash = data_hash.hexdigest() return (chunks, data_hash, bytes_transferred)
def upload_object_via_stream(self, iterator, container, object_name, extra=None): if isinstance(iterator, file): iterator = iter(iterator) data_hash = hashlib.md5() generator = read_in_chunks(iterator, CHUNK_SIZE, True) bytes_transferred = 0 try: chunk = next(generator) except StopIteration: chunk = '' path = self._namespace_path(container.name + '/' + object_name) method = 'PUT' if extra is not None: content_type = extra.get('content_type', None) else: content_type = None content_type = self._determine_content_type(content_type, object_name) try: self.connection.request(path + '?metadata/system') except AtmosError as e: if e.code != 1003: raise method = 'POST' while True: end = bytes_transferred + len(chunk) - 1 data_hash.update(b(chunk)) headers = { 'x-emc-meta': 'md5=' + data_hash.hexdigest(), 'Content-Type': content_type, } if len(chunk) > 0 and bytes_transferred > 0: headers['Range'] = 'Bytes=%d-%d' % (bytes_transferred, end) method = 'PUT' result = self.connection.request(path, method=method, data=chunk, headers=headers) bytes_transferred += len(chunk) try: chunk = next(generator) except StopIteration: break if len(chunk) == 0: break data_hash = data_hash.hexdigest() if extra is None: meta_data = {} else: meta_data = extra.get('meta_data', {}) meta_data['md5'] = data_hash user_meta = ', '.join( [k + '=' + str(v) for k, v in list(meta_data.items())]) self.connection.request(path + '?metadata/user', method='POST', headers={'x-emc-meta': user_meta}) result = self.connection.request(path + '?metadata/system') meta = self._emc_meta(result) extra = { 'object_id': meta['objectid'], 'meta_data': meta_data, } return Object(object_name, bytes_transferred, data_hash, extra, meta_data, container, self)