def __call__(self): LOGGER.debug("Completing multipart upload for file: %s", self.filename.src) upload_id = self._upload_context.wait_for_upload_id() parts = self._upload_context.wait_for_parts_to_finish() LOGGER.debug("Received upload id and parts list.") bucket, key = find_bucket_key(self.filename.dest) params = { "bucket": bucket, "key": key, "endpoint": self.filename.endpoint, "upload_id": upload_id, "multipart_upload": {"Parts": parts}, } try: operate(self.filename.service, "CompleteMultipartUpload", params) except Exception as e: LOGGER.debug("Error trying to complete multipart upload: %s", e, exc_info=True) message = print_operation(self.filename, failed=True, dryrun=self.parameters["dryrun"]) message += "\n" + str(e) result = {"message": message, "error": True} else: LOGGER.debug("Multipart upload completed for: %s", self.filename.src) message = print_operation(self.filename, False, self.parameters["dryrun"]) result = {"message": message, "error": False} self._upload_context.announce_completed() self.result_queue.put(result)
def __call__(self): LOGGER.debug("Completing multipart upload for file: %s", self.filename.src) upload_id = self._upload_context.wait_for_upload_id() parts = self._upload_context.wait_for_parts_to_finish() LOGGER.debug("Received upload id and parts list.") bucket, key = find_bucket_key(self.filename.dest) params = { 'bucket': bucket, 'key': key, 'endpoint': self.filename.endpoint, 'upload_id': upload_id, 'multipart_upload': {'Parts': parts}, } try: operate(self.filename.service, 'CompleteMultipartUpload', params) except Exception as e: LOGGER.debug("Error trying to complete multipart upload: %s", e, exc_info=True) message = print_operation( self.filename, failed=True, dryrun=self.parameters['dryrun']) message += '\n' + str(e) result = { 'message': message, 'error': True } else: LOGGER.debug("Multipart upload completed for: %s", self.filename.src) message = print_operation(self.filename, False, self.parameters['dryrun']) result = {'message': message, 'error': False} self._upload_context.announce_completed() self.result_queue.put(result)
def multi_upload(self): """ Performs multipart uploads. It initiates the multipart upload. It creates a queue ``part_queue`` which is directly responsible with controlling the progress of the multipart upload. It then creates ``UploadPartTasks`` for threads to run via the ``executer``. This fucntion waits for all of the parts in the multipart upload to finish, and then it completes the multipart upload. This method waits on its parts to finish. So, threads are required to process the parts for this function to complete. """ part_queue = NoBlockQueue(self.interrupt) complete_upload_queue = Queue.PriorityQueue() part_counter = MultiCounter() counter_lock = threading.Lock() bucket, key = find_bucket_key(self.dest) params = {'endpoint': self.endpoint, 'bucket': bucket, 'key': key} if self.parameters['acl']: params['acl'] = self.parameters['acl'][0] if self.parameters['guess_mime_type']: self._inject_content_type(params, self.src) response_data, http = operate(self.service, 'CreateMultipartUpload', params) upload_id = response_data['UploadId'] size_uploads = self.chunksize num_uploads = int(math.ceil(self.size/float(size_uploads))) for i in range(1, (num_uploads + 1)): part_info = (self, upload_id, i, size_uploads) part_queue.put(part_info) task = UploadPartTask(session=self.session, executer=self.executer, part_queue=part_queue, dest_queue=complete_upload_queue, region=self.region, printQueue=self.printQueue, interrupt=self.interrupt, part_counter=part_counter, counter_lock=counter_lock) self.executer.submit(task) part_queue.join() # The following ensures that if the multipart upload is in progress, # all part uploads finish before aborting or completing. This # really only applies when an interrupt signal is sent because the # ``part_queue.join()`` ensures this if the process is not # interrupted. while part_counter.count: time.sleep(0.1) parts_list = [] while not complete_upload_queue.empty(): part = complete_upload_queue.get() parts_list.append(part[1]) if len(parts_list) == num_uploads: parts = {'Parts': parts_list} params = {'endpoint': self.endpoint, 'bucket': bucket, 'key': key, 'upload_id': upload_id, 'multipart_upload': parts} operate(self.service, 'CompleteMultipartUpload', params) else: abort_params = {'endpoint': self.endpoint, 'bucket': bucket, 'key': key, 'upload_id': upload_id} operate(self.service, 'AbortMultipartUpload', abort_params) raise Exception()
def __call__(self): LOGGER.debug("Completing multipart upload for file: %s", self.filename.src) upload_id = self._upload_context.wait_for_upload_id() parts = self._upload_context.wait_for_parts_to_finish() LOGGER.debug("Received upload id and parts list.") bucket, key = find_bucket_key(self.filename.dest) params = { 'bucket': bucket, 'key': key, 'endpoint': self.filename.endpoint, 'upload_id': upload_id, 'multipart_upload': { 'Parts': parts }, } try: operate(self.filename.service, 'CompleteMultipartUpload', params) except Exception as e: LOGGER.debug("Error trying to complete multipart upload: %s", e, exc_info=True) message = print_operation(self.filename, failed=True, dryrun=self.parameters['dryrun']) message += '\n' + str(e) result = {'message': message, 'error': True} else: LOGGER.debug("Multipart upload completed for: %s", self.filename.src) message = print_operation(self.filename, False, self.parameters['dryrun']) result = {'message': message, 'error': False} self._upload_context.announce_completed() self.result_queue.put(result)
def __call__(self): total_file_size = self._filename.size start_range = self._part_number * self._chunk_size if self._part_number == int(total_file_size / self._chunk_size) - 1: end_range = '' else: end_range = start_range + self._chunk_size - 1 range_param = 'bytes=%s-%s' % (start_range, end_range) LOGGER.debug("Downloading bytes range of %s for file %s", range_param, self._filename.dest) bucket, key = find_bucket_key(self._filename.src) params = {'endpoint': self._filename.endpoint, 'bucket': bucket, 'key': key, 'range': range_param} try: LOGGER.debug("Making GetObject requests with byte range: %s", range_param) response_data, http = operate(self._service, 'GetObject', params) LOGGER.debug("Response received from GetObject") body = response_data['Body'] self._write_to_file(body) self._context.announce_completed_part(self._part_number) message = print_operation(self._filename, 0) total_parts = int(self._filename.size / self._chunk_size) result = {'message': message, 'error': False, 'total_parts': total_parts} self._result_queue.put(result) except Exception as e: LOGGER.debug( 'Exception caught downloading byte range: %s', e, exc_info=True) self._context.cancel() raise e
def remove_bucket(self): """ This operation removes a bucket. """ bucket, key = find_bucket_key(self.src) params = {'endpoint': self.endpoint, 'bucket': bucket} response_data, http = operate(self.service, 'DeleteBucket', params)
def set_size_from_s3(self): """ This runs a ``HeadObject`` on the s3 object and sets the size. """ bucket, key = find_bucket_key(self.src) params = {'endpoint': self.endpoint, 'bucket': bucket, 'key': key} response_data, http = operate(self.service, 'HeadObject', params) self.size = int(response_data['ContentLength'])
def create_multipart_upload(self): bucket, key = find_bucket_key(self.dest) params = {'endpoint': self.endpoint, 'bucket': bucket, 'key': key} self._handle_object_params(params) response_data, http = operate(self.service, 'CreateMultipartUpload', params) upload_id = response_data['UploadId'] return upload_id
def __call__(self): LOGGER.debug("Waiting for download to finish.") self._context.wait_for_completion() bucket, key = find_bucket_key(self._filename.src) params = {'endpoint': self._filename.endpoint, 'bucket': bucket, 'key': key} response_data, http = operate( self._filename.service, 'DeleteObject', params)
def download(self): """ Redirects the file to the multipart download function if the file is large. If it is small enough, it gets the file as an object from s3. """ bucket, key = find_bucket_key(self.src) params = {'endpoint': self.endpoint, 'bucket': bucket, 'key': key} response_data, http = operate(self.service, 'GetObject', params) save_file(self.dest, response_data, self.last_update, self.is_stream)
def download(self): """ Redirects the file to the multipart download function if the file is large. If it is small enough, it gets the file as an object from s3. """ bucket, key = find_bucket_key(self.src) params = {'endpoint': self.endpoint, 'bucket': bucket, 'key': key} response_data, http = operate(self.service, 'GetObject', params) save_file(self.dest, response_data, self.last_update)
def copy(self): """ Copies a object in s3 to another location in s3. """ copy_source = quote(self.src.encode('utf-8'), safe='/~') bucket, key = find_bucket_key(self.dest) params = {'endpoint': self.endpoint, 'bucket': bucket, 'copy_source': copy_source, 'key': key} self._handle_object_params(params) response_data, http = operate(self.service, 'CopyObject', params)
def copy(self): """ Copies a object in s3 to another location in s3. """ copy_source = self.src bucket, key = find_bucket_key(self.dest) params = {'endpoint': self.endpoint, 'bucket': bucket, 'copy_source': copy_source, 'key': key} self._handle_object_params(params) response_data, http = operate(self.service, 'CopyObject', params)
def make_bucket(self): """ This opereation makes a bucket. """ bucket, key = find_bucket_key(self.src) bucket_config = {'LocationConstraint': self.endpoint.region_name} params = {'endpoint': self.endpoint, 'bucket': bucket} if self.endpoint.region_name != 'us-east-1': params['create_bucket_configuration'] = bucket_config response_data, http = operate(self.service, 'CreateBucket', params)
def copy(self): """ Copies a object in s3 to another location in s3. """ copy_source = quote(self.src) bucket, key = find_bucket_key(self.dest) params = {'endpoint': self.endpoint, 'bucket': bucket, 'copy_source': copy_source, 'key': key} if self.parameters['acl']: params['acl'] = self.parameters['acl'][0] response_data, http = operate(self.service, 'CopyObject', params)
def delete(self): """ Deletes the file from s3 or local. The src file and type is used from the file info object. """ if (self.src_type == 's3'): bucket, key = find_bucket_key(self.src) params = {'endpoint': self.endpoint, 'bucket': bucket, 'key': key} response_data, http = operate(self.service, 'DeleteObject', params) else: os.remove(self.src)
def _cancel_upload(self, upload_id, filename): bucket, key = find_bucket_key(filename.dest) params = { 'bucket': bucket, 'key': key, 'upload_id': upload_id, 'endpoint': filename.endpoint, } LOGGER.debug("Aborting multipart upload for: %s", key) response_data, http = operate( filename.service, 'AbortMultipartUpload', params)
def _cancel_upload(self, upload_id, filename): bucket, key = find_bucket_key(filename.dest) params = { 'bucket': bucket, 'key': key, 'upload_id': upload_id, 'endpoint': filename.endpoint, } LOGGER.debug("Aborting multipart upload for: %s", key) response_data, http = operate(filename.service, 'AbortMultipartUpload', params)
def __call__(self): LOGGER.debug("Uploading part copy %s for filename: %s", self._part_number, self._filename.src) total_file_size = self._filename.size start_range = (self._part_number - 1) * self._chunk_size if self._is_last_part(self._part_number): end_range = total_file_size - 1 else: end_range = start_range + self._chunk_size - 1 range_param = 'bytes=%s-%s' % (start_range, end_range) try: LOGGER.debug("Waiting for upload id.") upload_id = self._upload_context.wait_for_upload_id() bucket, key = find_bucket_key(self._filename.dest) src_bucket, src_key = find_bucket_key(self._filename.src) params = { 'endpoint': self._filename.endpoint, 'bucket': bucket, 'key': key, 'part_number': self._part_number, 'upload_id': upload_id, 'copy_source': '%s/%s' % (src_bucket, src_key), 'copy_source_range': range_param } response_data, http = operate(self._filename.service, 'UploadPartCopy', params) etag = response_data['CopyPartResult']['ETag'][1:-1] self._upload_context.announce_finished_part( etag=etag, part_number=self._part_number) message = print_operation(self._filename, 0) result = { 'message': message, 'total_parts': self._total_parts(), 'error': False } self._result_queue.put(result) except UploadCancelledError as e: # We don't need to do anything in this case. The task # has been cancelled, and the task that cancelled the # task has already queued a message. LOGGER.debug("Not uploading part copy, task has been cancelled.") except Exception as e: LOGGER.debug('Error during upload part copy: %s', e, exc_info=True) message = print_operation(self._filename, failed=True, dryrun=False) message += '\n' + str(e) result = {'message': message, 'error': True} self._result_queue.put(result) self._upload_context.cancel_upload() else: LOGGER.debug("Copy part number %s completed for filename: %s", self._part_number, self._filename.src)
def _download_part(self): total_file_size = self._filename.size start_range = self._part_number * self._chunk_size if self._part_number == int(total_file_size / self._chunk_size) - 1: end_range = '' else: end_range = start_range + self._chunk_size - 1 range_param = 'bytes=%s-%s' % (start_range, end_range) LOGGER.debug("Downloading bytes range of %s for file %s", range_param, self._filename.dest) bucket, key = find_bucket_key(self._filename.src) params = { 'endpoint': self._filename.endpoint, 'bucket': bucket, 'key': key, 'range': range_param } for i in range(self.TOTAL_ATTEMPTS): try: LOGGER.debug("Making GetObject requests with byte range: %s", range_param) response_data, http = operate(self._service, 'GetObject', params) LOGGER.debug("Response received from GetObject") body = response_data['Body'] self._queue_writes(body) self._context.announce_completed_part(self._part_number) message = print_operation(self._filename, 0) total_parts = int(self._filename.size / self._chunk_size) result = { 'message': message, 'error': False, 'total_parts': total_parts } self._result_queue.put(result) LOGGER.debug("Task complete: %s", self) return except (socket.timeout, socket.error) as e: LOGGER.debug( "Socket timeout caught, retrying request, " "(attempt %s / %s)", i, self.TOTAL_ATTEMPTS, exc_info=True) continue except IncompleteReadError as e: LOGGER.debug("Incomplete read detected: %s, (attempt %s / %s)", e, i, self.TOTAL_ATTEMPTS) continue raise RetriesExeededError("Maximum number of attempts exceeded: %s" % self.TOTAL_ATTEMPTS)
def _handle_upload(self, body): bucket, key = find_bucket_key(self.dest) params = { 'endpoint': self.endpoint, 'bucket': bucket, 'key': key, 'body': body, } self._handle_object_params(params) response_data, http = operate(self.service, 'PutObject', params) etag = response_data['ETag'][1:-1] body.seek(0) check_etag(etag, body)
def __call__(self): LOGGER.debug("Uploading part %s for filename: %s", self._part_number, self._filename.src) try: LOGGER.debug("Waiting for upload id.") upload_id = self._upload_context.wait_for_upload_id() bucket, key = find_bucket_key(self._filename.dest) if self._filename.is_stream: body = self._payload total = self._upload_context.expected_parts else: total = int( math.ceil(self._filename.size / float(self._chunk_size))) body = self._read_part() params = { 'endpoint': self._filename.endpoint, 'bucket': bucket, 'key': key, 'part_number': self._part_number, 'upload_id': upload_id, 'body': body } try: response_data, http = operate(self._filename.service, 'UploadPart', params) finally: body.close() etag = response_data['ETag'][1:-1] self._upload_context.announce_finished_part( etag=etag, part_number=self._part_number) message = print_operation(self._filename, 0) result = {'message': message, 'total_parts': total, 'error': False} self._result_queue.put(PrintTask(**result)) except UploadCancelledError as e: # We don't need to do anything in this case. The task # has been cancelled, and the task that cancelled the # task has already queued a message. LOGGER.debug("Not uploading part, task has been cancelled.") except Exception as e: LOGGER.debug('Error during part upload: %s', e, exc_info=True) message = print_operation(self._filename, failed=True, dryrun=False) message += '\n' + str(e) result = {'message': message, 'error': True} self._result_queue.put(PrintTask(**result)) self._upload_context.cancel_upload() else: LOGGER.debug("Part number %s completed for filename: %s", self._part_number, self._filename.src)
def __call__(self): try: part_info = self.part_queue.get(True, QUEUE_TIMEOUT_GET) with self.counter_lock: self.part_counter.count += 1 try: filename = part_info[0] upload_id = part_info[1] part_number = part_info[2] part_size = part_info[3] body = self.read_part(filename, part_number, part_size) bucket, key = find_bucket_key(filename.dest) if sys.version_info[:2] == (2, 6): stream_body = StringIO(body) else: stream_body = bytearray(body) params = {'endpoint': self.endpoint, 'bucket': bucket, 'key': key, 'part_number': str(part_number), 'upload_id': upload_id, 'body': stream_body} response_data, http = operate(self.service, 'UploadPart', params) etag = retrieve_http_etag(http) check_etag(etag, body) parts = {'ETag': etag, 'PartNumber': part_number} self.dest_queue.put((part_number, parts)) print_str = print_operation(filename, 0) print_result = {'result': print_str} total = int(math.ceil(filename.size/float(part_size))) part_str = {'total': total} print_result['part'] = part_str self.printQueue.put(print_result) except requests.ConnectionError as e: connect_error = str(e) LOGGER.debug("%s part upload failure: %s" % (part_info[0].src, connect_error)) self.part_queue.put(part_info) self.executer.submit(copy.copy(self)) except MD5Error: LOGGER.debug("%s part upload failure: Data" "was corrupted" % part_info[0].src) self.part_queue.put(part_info) self.executer.submit(copy.copy(self)) except Exception as e: LOGGER.debug('%s' % str(e)) self.part_queue.task_done() with self.counter_lock: self.part_counter.count -= 1 except Queue.Empty: pass
def __call__(self): LOGGER.debug("Uploading part %s for filename: %s", self._part_number, self._filename.src) try: LOGGER.debug("Waiting for upload id.") upload_id = self._upload_context.wait_for_upload_id() bucket, key = find_bucket_key(self._filename.dest) if self._filename.is_stream: body = self._payload total = self._upload_context.expected_parts else: total = int(math.ceil( self._filename.size/float(self._chunk_size))) body = self._read_part() params = {'endpoint': self._filename.endpoint, 'bucket': bucket, 'key': key, 'part_number': self._part_number, 'upload_id': upload_id, 'body': body} try: response_data, http = operate( self._filename.service, 'UploadPart', params) finally: body.close() etag = response_data['ETag'][1:-1] self._upload_context.announce_finished_part( etag=etag, part_number=self._part_number) message = print_operation(self._filename, 0) result = {'message': message, 'total_parts': total, 'error': False} self._result_queue.put(PrintTask(**result)) except UploadCancelledError as e: # We don't need to do anything in this case. The task # has been cancelled, and the task that cancelled the # task has already queued a message. LOGGER.debug("Not uploading part, task has been cancelled.") except Exception as e: LOGGER.debug('Error during part upload: %s', e, exc_info=True) message = print_operation(self._filename, failed=True, dryrun=False) message += '\n' + str(e) result = {'message': message, 'error': True} self._result_queue.put(PrintTask(**result)) self._upload_context.cancel_upload() else: LOGGER.debug("Part number %s completed for filename: %s", self._part_number, self._filename.src)
def copy(self): """ Copies a object in s3 to another location in s3. """ copy_source = quote(self.src) bucket, key = find_bucket_key(self.dest) params = { 'endpoint': self.endpoint, 'bucket': bucket, 'copy_source': copy_source, 'key': key } if self.parameters['acl']: params['acl'] = self.parameters['acl'][0] response_data, http = operate(self.service, 'CopyObject', params)
def __call__(self): LOGGER.debug("Uploading part copy %s for filename: %s", self._part_number, self._filename.src) total_file_size = self._filename.size start_range = (self._part_number - 1) * self._chunk_size if self._is_last_part(self._part_number): end_range = total_file_size - 1 else: end_range = start_range + self._chunk_size - 1 range_param = 'bytes=%s-%s' % (start_range, end_range) try: LOGGER.debug("Waiting for upload id.") upload_id = self._upload_context.wait_for_upload_id() bucket, key = find_bucket_key(self._filename.dest) src_bucket, src_key = find_bucket_key(self._filename.src) params = {'endpoint': self._filename.endpoint, 'bucket': bucket, 'key': key, 'part_number': self._part_number, 'upload_id': upload_id, 'copy_source': '%s/%s' % (src_bucket, src_key), 'copy_source_range': range_param} response_data, http = operate( self._filename.service, 'UploadPartCopy', params) etag = response_data['CopyPartResult']['ETag'][1:-1] self._upload_context.announce_finished_part( etag=etag, part_number=self._part_number) message = print_operation(self._filename, 0) result = {'message': message, 'total_parts': self._total_parts(), 'error': False} self._result_queue.put(result) except UploadCancelledError as e: # We don't need to do anything in this case. The task # has been cancelled, and the task that cancelled the # task has already queued a message. LOGGER.debug("Not uploading part copy, task has been cancelled.") except Exception as e: LOGGER.debug('Error during upload part copy: %s', e, exc_info=True) message = print_operation(self._filename, failed=True, dryrun=False) message += '\n' + str(e) result = {'message': message, 'error': True} self._result_queue.put(result) self._upload_context.cancel_upload() else: LOGGER.debug("Copy part number %s completed for filename: %s", self._part_number, self._filename.src)
def __call__(self): try: part_info = self.part_queue.get(True, QUEUE_TIMEOUT_GET) with self.counter_lock: self.part_counter.count += 1 filename = part_info[0] part_number = part_info[1] size_uploads = part_info[2] last_part_number = int(filename.size / size_uploads) - 1 beginning_range = part_number*size_uploads str_range = "bytes=" if part_number == last_part_number: str_range += str(beginning_range) + "-" else: end_range = beginning_range + size_uploads - 1 str_range += str(beginning_range) + "-" + str(end_range) bucket, key = find_bucket_key(filename.src) try: params = {'endpoint': self.endpoint, 'bucket': bucket, 'key': key, 'range': str_range} response_data, http = operate(self.service, 'GetObject', params) body = response_data['Body'].read() with self.write_lock: self.f.seek(part_number*size_uploads) self.f.write(body) print_str = print_operation(filename, 0) print_result = {'result': print_str} part_str = {'total': int(filename.size / size_uploads)} print_result['part'] = part_str self.printQueue.put(print_result) self.dest_queue.put(part_number) except requests.ConnectionError as e: connect_error = str(e) LOGGER.debug("%s part download failure: %s" % (part_info[0].src, connect_error)) self.part_queue.put(part_info) self.executer.submit(self) except Exception as e: LOGGER.debug('%s' % str(e)) self.part_queue.task_done() with self.counter_lock: self.part_counter.count -= 1 except Queue.Empty: pass
def upload(self): """ Redirects the file to the multipart upload function if the file is large. If it is small enough, it puts the file as an object in s3. """ with open(self.src, 'rb') as body: bucket, key = find_bucket_key(self.dest) params = { 'endpoint': self.endpoint, 'bucket': bucket, 'key': key, 'body': body, } self._handle_object_params(params) response_data, http = operate(self.service, 'PutObject', params) etag = response_data['ETag'][1:-1] body.seek(0) check_etag(etag, body)
def upload(self): """ Redirects the file to the multipart upload function if the file is large. If it is small enough, it puts the file as an object in s3. """ body = read_file(self.src) bucket, key = find_bucket_key(self.dest) if sys.version_info[:2] == (2, 6): stream_body = StringIO(body) else: stream_body = bytearray(body) params = {'endpoint': self.endpoint, 'bucket': bucket, 'key': key} if body: params['body'] = stream_body self._handle_object_params(params) response_data, http = operate(self.service, 'PutObject', params) etag = retrieve_http_etag(http) check_etag(etag, body)
def __call__(self): LOGGER.debug("Uploading part copy %s for filename: %s", self._part_number, self._filename.src) total_file_size = self._filename.size start_range = (self._part_number - 1) * self._chunk_size if self._is_last_part(self._part_number): end_range = total_file_size - 1 else: end_range = start_range + self._chunk_size - 1 range_param = "bytes=%s-%s" % (start_range, end_range) try: LOGGER.debug("Waiting for upload id.") upload_id = self._upload_context.wait_for_upload_id() bucket, key = find_bucket_key(self._filename.dest) src_bucket, src_key = find_bucket_key(self._filename.src) params = { "endpoint": self._filename.endpoint, "bucket": bucket, "key": key, "part_number": self._part_number, "upload_id": upload_id, "copy_source": "%s/%s" % (src_bucket, src_key), "copy_source_range": range_param, } response_data, http = operate(self._filename.service, "UploadPartCopy", params) etag = response_data["CopyPartResult"]["ETag"][1:-1] self._upload_context.announce_finished_part(etag=etag, part_number=self._part_number) message = print_operation(self._filename, 0) result = {"message": message, "total_parts": self._total_parts(), "error": False} self._result_queue.put(result) except UploadCancelledError as e: # We don't need to do anything in this case. The task # has been cancelled, and the task that cancelled the # task has already queued a message. LOGGER.debug("Not uploading part copy, task has been cancelled.") except Exception as e: LOGGER.debug("Error during upload part copy: %s", e, exc_info=True) message = print_operation(self._filename, failed=True, dryrun=False) message += "\n" + str(e) result = {"message": message, "error": True} self._result_queue.put(result) self._upload_context.cancel_upload() else: LOGGER.debug("Copy part number %s completed for filename: %s", self._part_number, self._filename.src)
def _download_part(self): total_file_size = self._filename.size start_range = self._part_number * self._chunk_size if self._part_number == int(total_file_size / self._chunk_size) - 1: end_range = '' else: end_range = start_range + self._chunk_size - 1 range_param = 'bytes=%s-%s' % (start_range, end_range) LOGGER.debug("Downloading bytes range of %s for file %s", range_param, self._filename.dest) bucket, key = find_bucket_key(self._filename.src) params = {'endpoint': self._filename.endpoint, 'bucket': bucket, 'key': key, 'range': range_param} for i in range(self.TOTAL_ATTEMPTS): try: LOGGER.debug("Making GetObject requests with byte range: %s", range_param) response_data, http = operate(self._service, 'GetObject', params) LOGGER.debug("Response received from GetObject") body = response_data['Body'] self._queue_writes(body) self._context.announce_completed_part(self._part_number) message = print_operation(self._filename, 0) total_parts = int(self._filename.size / self._chunk_size) result = {'message': message, 'error': False, 'total_parts': total_parts} self._result_queue.put(result) LOGGER.debug("Task complete: %s", self) return except (socket.timeout, socket.error) as e: LOGGER.debug("Socket timeout caught, retrying request, " "(attempt %s / %s)", i, self.TOTAL_ATTEMPTS, exc_info=True) continue except IncompleteReadError as e: LOGGER.debug("Incomplete read detected: %s, (attempt %s / %s)", e, i, self.TOTAL_ATTEMPTS) continue raise RetriesExeededError("Maximum number of attempts exceeded: %s" % self.TOTAL_ATTEMPTS)
def __call__(self): LOGGER.debug("Uploading part %s for filename: %s", self._part_number, self._filename.src) try: LOGGER.debug("Waiting for upload id.") upload_id = self._upload_context.wait_for_upload_id() bucket, key = find_bucket_key(self._filename.dest) total = int(math.ceil(self._filename.size / float(self._chunk_size))) body = self._read_part() params = { "endpoint": self._filename.endpoint, "bucket": bucket, "key": key, "part_number": self._part_number, "upload_id": upload_id, "body": body, } try: response_data, http = operate(self._filename.service, "UploadPart", params) finally: body.close() etag = response_data["ETag"][1:-1] self._upload_context.announce_finished_part(etag=etag, part_number=self._part_number) message = print_operation(self._filename, 0) result = {"message": message, "total_parts": total, "error": False} self._result_queue.put(result) except UploadCancelledError as e: # We don't need to do anything in this case. The task # has been cancelled, and the task that cancelled the # task has already queued a message. LOGGER.debug("Not uploading part, task has been cancelled.") except Exception as e: LOGGER.debug("Error during part upload: %s", e, exc_info=True) message = print_operation(self._filename, failed=True, dryrun=False) message += "\n" + str(e) result = {"message": message, "error": True} self._result_queue.put(result) self._upload_context.cancel_upload() else: LOGGER.debug("Part number %s completed for filename: %s", self._part_number, self._filename.src)
def upload(self): """ Redirects the file to the multipart upload function if the file is large. If it is small enough, it puts the file as an object in s3. """ if not self.is_multi: body = read_file(self.src) bucket, key = find_bucket_key(self.dest) if sys.version_info[:2] == (2, 6): stream_body = StringIO(body) else: stream_body = bytearray(body) params = {'endpoint': self.endpoint, 'bucket': bucket, 'key': key} if body: params['body'] = stream_body if self.parameters['acl']: params['acl'] = self.parameters['acl'][0] response_data, http = operate(self.service, 'PutObject', params) etag = retrieve_http_etag(http) check_etag(etag, body) else: self.multi_upload()
def __call__(self): LOGGER.debug("Uploading part %s for filename: %s", self._part_number, self._filename.src) try: LOGGER.debug("Waiting for upload id.") upload_id = self._upload_context.wait_for_upload_id() body = self._read_part() bucket, key = find_bucket_key(self._filename.dest) if sys.version_info[:2] == (2, 6): body = StringIO(body) else: body = bytearray(body) params = {'endpoint': self._filename.endpoint, 'bucket': bucket, 'key': key, 'part_number': str(self._part_number), 'upload_id': upload_id, 'body': body} response_data, http = operate( self._filename.service, 'UploadPart', params) etag = retrieve_http_etag(http) self._upload_context.announce_finished_part( etag=etag, part_number=self._part_number) print_str = print_operation(self._filename, 0) print_result = {'result': print_str} total = int(math.ceil( self._filename.size/float(self._chunk_size))) part_str = {'total': total} print_result['part'] = part_str self._print_queue.put(print_result) except Exception as e: LOGGER.debug('Error during part upload: %s' , e, exc_info=True) self._upload_context.cancel_upload() else: LOGGER.debug("Part number %s completed for filename: %s", self._part_number, self._filename.src)
def upload(self): """ Redirects the file to the multipart upload function if the file is large. If it is small enough, it puts the file as an object in s3. """ if not self.is_multi: body = read_file(self.src) bucket, key = find_bucket_key(self.dest) if sys.version_info[:2] == (2, 6): stream_body = StringIO(body) else: stream_body = bytearray(body) params = {'endpoint': self.endpoint, 'bucket': bucket, 'key': key} if body: params['body'] = stream_body if self.parameters['acl']: params['acl'] = self.parameters['acl'][0] if self.parameters['guess_mime_type']: self._inject_content_type(params, self.src) response_data, http = operate(self.service, 'PutObject', params) etag = retrieve_http_etag(http) check_etag(etag, body) else: self.multi_upload()
def multi_upload(self): """ Performs multipart uploads. It initiates the multipart upload. It creates a queue ``part_queue`` which is directly responsible with controlling the progress of the multipart upload. It then creates ``UploadPartTasks`` for threads to run via the ``executer``. This fucntion waits for all of the parts in the multipart upload to finish, and then it completes the multipart upload. This method waits on its parts to finish. So, threads are required to process the parts for this function to complete. """ part_queue = NoBlockQueue(self.interrupt) complete_upload_queue = Queue.PriorityQueue() part_counter = MultiCounter() counter_lock = threading.Lock() bucket, key = find_bucket_key(self.dest) params = {'endpoint': self.endpoint, 'bucket': bucket, 'key': key} if self.parameters['acl']: params['acl'] = self.parameters['acl'][0] if self.parameters['guess_mime_type']: self._inject_content_type(params, self.src) response_data, http = operate(self.service, 'CreateMultipartUpload', params) upload_id = response_data['UploadId'] size_uploads = self.chunksize num_uploads = int(math.ceil(self.size / float(size_uploads))) for i in range(1, (num_uploads + 1)): part_info = (self, upload_id, i, size_uploads) part_queue.put(part_info) task = UploadPartTask(session=self.session, executer=self.executer, part_queue=part_queue, dest_queue=complete_upload_queue, region=self.region, printQueue=self.printQueue, interrupt=self.interrupt, part_counter=part_counter, counter_lock=counter_lock) self.executer.submit(task) part_queue.join() # The following ensures that if the multipart upload is in progress, # all part uploads finish before aborting or completing. This # really only applies when an interrupt signal is sent because the # ``part_queue.join()`` ensures this if the process is not # interrupted. while part_counter.count: time.sleep(0.1) parts_list = [] while not complete_upload_queue.empty(): part = complete_upload_queue.get() parts_list.append(part[1]) if len(parts_list) == num_uploads: parts = {'Parts': parts_list} params = { 'endpoint': self.endpoint, 'bucket': bucket, 'key': key, 'upload_id': upload_id, 'multipart_upload': parts } operate(self.service, 'CompleteMultipartUpload', params) else: abort_params = { 'endpoint': self.endpoint, 'bucket': bucket, 'key': key, 'upload_id': upload_id } operate(self.service, 'AbortMultipartUpload', abort_params) raise Exception()
def _cancel_upload(self, upload_id, filename): bucket, key = find_bucket_key(filename.dest) params = {"bucket": bucket, "key": key, "upload_id": upload_id, "endpoint": filename.endpoint} LOGGER.debug("Aborting multipart upload for: %s", key) response_data, http = operate(filename.service, "AbortMultipartUpload", params)