def initiate_multipart_upload(self, key_name, headers=None, reduced_redundancy=False, metadata=None, encrypt_key=False, policy=None, calc_encrypt_md5=True): """ Start a multipart upload operation. Note: After you initiate multipart upload and upload one or more parts, you must either complete or abort multipart upload in order to stop getting charged for storage of the uploaded parts. Only after you either complete or abort multipart upload, Amazon S3 frees up the parts storage and stops charging you for the parts storage. """ query_args = 'uploads' provider = self.connection.provider headers = headers or {} if policy: headers[provider.acl_header] = policy if reduced_redundancy: storage_class_header = provider.storage_class_header if storage_class_header: headers[storage_class_header] = 'REDUCED_REDUNDANCY' # TODO: what if the provider doesn't support reduced redundancy? if encrypt_key: headers[provider.server_side_encryption_header] = 'AES256' if metadata is None: metadata = {} headers = ks3.utils.merge_meta(headers, metadata, self.connection.provider) if self.connection.local_encrypt: crypts = Crypts(self.connection.key) crypts.calc_md5 = calc_encrypt_md5 crypts.action_info = "init_multi" md5_generator = hashlib.md5() md5_generator.update(crypts.key) headers["x-kss-meta-key"] = base64.b64encode(md5_generator.hexdigest().encode()).decode() headers["x-kss-meta-iv"] = base64.b64encode(crypts.first_iv).decode() response = self.connection.make_request('POST', self.name, key_name, query_args=query_args, headers=headers) else: response = self.connection.make_request('POST', self.name, key_name, query_args=query_args, headers=headers) body = response.read() if response.status == 200: resp = MultiPartUpload(self) if self.connection.local_encrypt: resp.set_crypt_context(crypts) h = handler.XmlHandler(resp, self) if not isinstance(body, bytes): body = body.encode('utf-8') xml.sax.parseString(body, h) return resp else: raise self.connection.provider.storage_response_error( response.status, response.reason, body)
def initiate_multipart_upload(self, key_name, headers=None, reduced_redundancy=False, metadata=None, encrypt_key=False, policy=None, calc_encrypt_md5=True): """ Start a multipart upload operation. Note: After you initiate multipart upload and upload one or more parts, you must either complete or abort multipart upload in order to stop getting charged for storage of the uploaded parts. Only after you either complete or abort multipart upload, Amazon S3 frees up the parts storage and stops charging you for the parts storage. """ query_args = 'uploads' provider = self.connection.provider headers = headers or {} if policy: headers[provider.acl_header] = policy if reduced_redundancy: storage_class_header = provider.storage_class_header if storage_class_header: headers[storage_class_header] = 'REDUCED_REDUNDANCY' # TODO: what if the provider doesn't support reduced redundancy? if encrypt_key: headers[provider.server_side_encryption_header] = 'AES256' if metadata is None: metadata = {} headers = ks3.utils.merge_meta(headers, metadata, self.connection.provider) if self.connection.local_encrypt: crypts = Crypts(self.connection.key) crypts.calc_md5 = calc_encrypt_md5 crypts.action_info = "init_multi" md5_generator = hashlib.md5() md5_generator.update(crypts.key) headers["x-kss-meta-key"] = base64.b64encode(md5_generator.hexdigest()) headers["x-kss-meta-iv"] = base64.b64encode(crypts.first_iv) response = self.connection.make_request('POST', self.name, key_name, query_args=query_args, headers=headers) else: response = self.connection.make_request('POST', self.name, key_name, query_args=query_args, headers=headers) body = response.read() if response.status == 200: resp = MultiPartUpload(self) if self.connection.local_encrypt: resp.set_crypt_context(crypts) h = handler.XmlHandler(resp, self) if not isinstance(body, bytes): body = body.encode('utf-8') xml.sax.parseString(body, h) return resp else: raise self.connection.provider.storage_response_error( response.status, response.reason, body)
def set_contents_from_file(self, fp, headers=None, replace=True, cb=None, num_cb=10, policy=None, md5=None, reduced_redundancy=False, query_args=None, encrypt_key=False, size=None, rewind=False, crypt_context=None, calc_encrypt_md5=True): """ Store an object in S3 using the name of the Key object as the key in S3 and the contents of the file pointed to by 'fp' as the contents. The data is read from 'fp' from its current position until 'size' bytes have been read or EOF. :type fp: file :param fp: the file whose contents to upload :type headers: dict :param headers: Additional HTTP headers that will be sent with the PUT request. :type replace: bool :param replace: If this parameter is False, the method will first check to see if an object exists in the bucket with the same key. If it does, it won't overwrite it. The default value is True which will overwrite the object. :type cb: function :param cb: a callback function that will be called to report progress on the upload. The callback should accept two integer parameters, the first representing the number of bytes that have been successfully transmitted to S3 and the second representing the size of the to be transmitted object. :type num_cb: int :param num_cb: (optional) If a callback is specified with the cb parameter this parameter determines the granularity of the callback by defining the maximum number of times the callback will be called during the file transfer. :type policy: :class:`boto.s3.acl.CannedACLStrings` :param policy: A canned ACL policy that will be applied to the new key in S3. :type md5: A tuple containing the hexdigest version of the MD5 checksum of the file as the first element and the Base64-encoded version of the plain checksum as the second element. This is the same format returned by the compute_md5 method. :type reduced_redundancy: bool :param reduced_redundancy: If True, this will set the storage class of the new Key to be REDUCED_REDUNDANCY. The Reduced Redundancy Storage (RRS) feature of S3, provides lower redundancy at lower storage cost. :type encrypt_key: bool :param encrypt_key: If True, the new copy of the object will be encrypted on the server-side by S3 and will be stored in an encrypted form while at rest in S3. :type size: int :param size: (optional) The Maximum number of bytes to read from the file pointer (fp). This is useful when uploading a file in multiple parts where you are splitting the file up into different ranges to be uploaded. If not specified, the default behaviour is to read all bytes from the file pointer. Less bytes may be available. :type rewind: bool :param rewind: (optional) If True, the file pointer (fp) will be rewound to the start before any bytes are read from it. The default behaviour is False which reads from the current position of the file pointer (fp). :rtype: int :return: The number of bytes written to the key. """ provider = self.bucket.connection.provider headers = headers or {} if policy: headers[provider.acl_header] = policy if encrypt_key: headers[provider.server_side_encryption_header] = 'AES256' if rewind: # caller requests reading from beginning of fp. fp.seek(0, os.SEEK_SET) else: # The following seek/tell/seek logic is intended # to detect applications using the older interface to # set_contents_from_file(), which automatically rewound the # file each time the Key was reused. This changed with commit # 14ee2d03f4665fe20d19a85286f78d39d924237e, to support uploads # split into multiple parts and uploaded in parallel, and at # the time of that commit this check was added because otherwise # older programs would get a success status and upload an empty # object. Unfortuantely, it's very inefficient for fp's implemented # by KeyFile (used, for example, by gsutil when copying between # providers). So, we skip the check for the KeyFile case. # TODO: At some point consider removing this seek/tell/seek # logic, after enough time has passed that it's unlikely any # programs remain that assume the older auto-rewind interface. if not isinstance(fp, KeyFile): spos = fp.tell() fp.seek(0, os.SEEK_END) if fp.tell() == spos: fp.seek(0, os.SEEK_SET) if fp.tell() != spos: # Raise an exception as this is likely a programming # error whereby there is data before the fp but nothing # after it. fp.seek(spos) raise AttributeError('fp is at EOF. Use rewind option ' 'or seek() to data start.') # seek back to the correct position. fp.seek(spos) if reduced_redundancy: self.storage_class = 'REDUCED_REDUNDANCY' if provider.storage_class_header: headers[provider.storage_class_header] = self.storage_class # TODO - What if provider doesn't support reduced reduncancy? # What if different providers provide different classes? if hasattr(fp, 'name'): self.path = fp.name if self.bucket is not None: if not md5 and provider.supports_chunked_transfer(): # defer md5 calculation to on the fly and # we don't know anything about size yet. chunked_transfer = True self.size = None else: chunked_transfer = False if isinstance(fp, KeyFile): # Avoid EOF seek for KeyFile case as it's very inefficient. key = fp.getkey() size = key.size - fp.tell() self.size = size # At present both GCS and S3 use MD5 for the etag for # non-multipart-uploaded objects. If the etag is 32 hex # chars use it as an MD5, to avoid having to read the file # twice while transferring. if (re.match('^"[a-fA-F0-9]{32}"$', key.etag)): etag = key.etag.strip('"') md5 = (etag, base64.b64encode(binascii.unhexlify(etag))) if not md5: # compute_md5() and also set self.size to actual # size of the bytes read computing the md5. md5 = self.compute_md5(fp, size) # adjust size if required size = self.size elif size: self.size = size else: # If md5 is provided, still need to size so # calculate based on bytes to end of content spos = fp.tell() fp.seek(0, os.SEEK_END) self.size = fp.tell() - spos fp.seek(spos) size = self.size self.md5 = md5[0] self.base64md5 = md5[1] if self.name is None: self.name = self.md5 if not replace: if self.bucket.lookup(self.name): return if self.bucket.connection.local_encrypt and self.size: if not crypt_context: crypt_context = Crypts(self.bucket.connection.key) crypt_context.action_info = "put" crypt_context.calc_md5 = calc_encrypt_md5 return self.send_file(fp, headers=headers, cb=cb, num_cb=num_cb, query_args=query_args, chunked_transfer=chunked_transfer, size=size, crypt_context=crypt_context) return self.send_file(fp, headers=headers, cb=cb, num_cb=num_cb, query_args=query_args, chunked_transfer=chunked_transfer, size=size)