def _submit_transfer_request(self, fileinfo, extra_args, subscribers): bucket, key = find_bucket_key(fileinfo.dest) source_bucket, source_key = find_bucket_key(fileinfo.src) copy_source = {'Bucket': source_bucket, 'Key': source_key} return self._transfer_manager.copy( bucket=bucket, key=key, copy_source=copy_source, extra_args=extra_args, subscribers=subscribers, source_client=fileinfo.source_client )
def copy(self): """ Copies a object in s3 to another location in s3. """ source_bucket, source_key = find_bucket_key(self.src) copy_source = {'Bucket': source_bucket, 'Key': source_key} bucket, key = find_bucket_key(self.dest) params = {'Bucket': bucket, 'CopySource': copy_source, 'Key': key} self._inject_content_type(params) RequestParamsMapper.map_copy_object_params(params, self.parameters) response_data = self.client.copy_object(**params)
def __call__(self): LOGGER.debug("Uploading part copy %s for filename: %s", self._part_number, self._filename.src) total_file_size = self._filename.size start_range = (self._part_number - 1) * self._chunk_size if self._is_last_part(self._part_number): end_range = total_file_size - 1 else: end_range = start_range + self._chunk_size - 1 range_param = 'bytes=%s-%s' % (start_range, end_range) try: LOGGER.debug("Waiting for upload id.") upload_id = self._upload_context.wait_for_upload_id() bucket, key = find_bucket_key(self._filename.dest) src_bucket, src_key = find_bucket_key(self._filename.src) params = {'endpoint': self._filename.endpoint, 'bucket': bucket, 'key': key, 'part_number': self._part_number, 'upload_id': upload_id, 'copy_source': '%s/%s' % (src_bucket, src_key), 'copy_source_range': range_param} response_data, http = operate( self._filename.service, 'UploadPartCopy', params) etag = response_data['CopyPartResult']['ETag'][1:-1] self._upload_context.announce_finished_part( etag=etag, part_number=self._part_number) message = print_operation(self._filename, 0) result = {'message': message, 'total_parts': self._total_parts(), 'error': False} self._result_queue.put(result) except UploadCancelledError as e: # We don't need to do anything in this case. The task # has been cancelled, and the task that cancelled the # task has already queued a message. LOGGER.debug("Not uploading part copy, task has been cancelled.") except Exception as e: LOGGER.debug('Error during upload part copy: %s', e, exc_info=True) message = print_operation(self._filename, failed=True, dryrun=False) message += '\n' + str(e) result = {'message': message, 'error': True} self._result_queue.put(result) self._upload_context.cancel_upload() else: LOGGER.debug("Copy part number %s completed for filename: %s", self._part_number, self._filename.src)
def __call__(self): total_file_size = self._filename.size start_range = self._part_number * self._chunk_size if self._part_number == int(total_file_size / self._chunk_size) - 1: end_range = '' else: end_range = start_range + self._chunk_size - 1 range_param = 'bytes=%s-%s' % (start_range, end_range) LOGGER.debug("Downloading bytes range of %s for file %s", range_param, self._filename.dest) bucket, key = find_bucket_key(self._filename.src) params = {'endpoint': self._filename.endpoint, 'bucket': bucket, 'key': key, 'range': range_param} try: LOGGER.debug("Making GetObject requests with byte range: %s", range_param) response_data, http = operate(self._service, 'GetObject', params) LOGGER.debug("Response received from GetObject") body = response_data['Body'] self._write_to_file(body) self._context.announce_completed_part(self._part_number) message = print_operation(self._filename, 0) total_parts = int(self._filename.size / self._chunk_size) result = {'message': message, 'error': False, 'total_parts': total_parts} self._result_queue.put(result) except Exception as e: LOGGER.debug( 'Exception caught downloading byte range: %s', e, exc_info=True) self._context.cancel() raise e
def __call__(self): LOGGER.debug("Completing multipart upload for file: %s", self.filename.src) upload_id = self._upload_context.wait_for_upload_id() parts = self._upload_context.wait_for_parts_to_finish() LOGGER.debug("Received upload id and parts list.") bucket, key = find_bucket_key(self.filename.dest) params = { "bucket": bucket, "key": key, "endpoint": self.filename.endpoint, "upload_id": upload_id, "multipart_upload": {"Parts": parts}, } try: operate(self.filename.service, "CompleteMultipartUpload", params) except Exception as e: LOGGER.debug("Error trying to complete multipart upload: %s", e, exc_info=True) message = print_operation(self.filename, failed=True, dryrun=self.parameters["dryrun"]) message += "\n" + str(e) result = {"message": message, "error": True} else: LOGGER.debug("Multipart upload completed for: %s", self.filename.src) message = print_operation(self.filename, False, self.parameters["dryrun"]) result = {"message": message, "error": False} self._upload_context.announce_completed() self.result_queue.put(result)
def __call__(self): LOGGER.debug("Completing multipart upload for file: %s", self.filename.src) upload_id = self._upload_context.wait_for_upload_id() parts = self._upload_context.wait_for_parts_to_finish() LOGGER.debug("Received upload id and parts list.") bucket, key = find_bucket_key(self.filename.dest) params = { 'bucket': bucket, 'key': key, 'endpoint': self.filename.endpoint, 'upload_id': upload_id, 'multipart_upload': {'Parts': parts}, } try: operate(self.filename.service, 'CompleteMultipartUpload', params) except Exception as e: LOGGER.debug("Error trying to complete multipart upload: %s", e, exc_info=True) message = print_operation( self.filename, failed=True, dryrun=self.parameters['dryrun']) message += '\n' + str(e) result = { 'message': message, 'error': True } else: LOGGER.debug("Multipart upload completed for: %s", self.filename.src) message = print_operation(self.filename, False, self.parameters['dryrun']) result = {'message': message, 'error': False} self._upload_context.announce_completed() self.result_queue.put(result)
def _run_main(self, parsed_args, parsed_globals): super(ListCommand, self)._run_main(parsed_args, parsed_globals) self._empty_result = False self._at_first_page = True self._size_accumulator = 0 self._total_objects = 0 self._human_readable = parsed_args.human_readable path = parsed_args.paths if path.startswith('s3://'): path = path[5:] bucket, key = find_bucket_key(path) if not bucket: self._list_all_buckets() elif parsed_args.dir_op: # Then --recursive was specified. self._list_all_objects_recursive(bucket, key, parsed_args.page_size) else: self._list_all_objects(bucket, key, parsed_args.page_size) if parsed_args.summarize: self._print_summary() if key: # User specified a key to look for. We should return an rc of one # if there are no matching keys and/or prefixes or return an rc # of zero if there are matching keys or prefixes. return self._check_no_objects() else: # This covers the case when user is trying to list all of of # the buckets or is trying to list the objects of a bucket # (without specifying a key). For both situations, a rc of 0 # should be returned because applicable errors are supplied by # the server (i.e. bucket not existing). These errors will be # thrown before reaching the automatic return of rc of zero. return 0
def _submit_transfer_request(self, fileinfo, extra_args, subscribers): bucket, key = find_bucket_key(fileinfo.src) fileout = self._get_fileout(fileinfo) return self._transfer_manager.download( fileobj=fileout, bucket=bucket, key=key, extra_args=extra_args, subscribers=subscribers )
def create_multipart_upload(self): bucket, key = find_bucket_key(self.dest) params = {'Bucket': bucket, 'Key': key} self._handle_object_params(params) response_data = self.client.create_multipart_upload(**params) upload_id = response_data['UploadId'] return upload_id
def _list_single_object(self, s3_path): # When we know we're dealing with a single object, we can avoid # a ListObjects operation (which causes concern for anyone setting # IAM policies with the smallest set of permissions needed) and # instead use a HeadObject request. bucket, key = find_bucket_key(s3_path) try: params = {'Bucket': bucket, 'Key': key} params.update(self.request_parameters.get('HeadObject', {})) response = self._client.head_object(**params) except ClientError as e: # We want to try to give a more helpful error message. # This is what the customer is going to see so we want to # give as much detail as we have. if not e.response['Error']['Code'] == '404': raise # The key does not exist so we'll raise a more specific # error message here. response = e.response.copy() response['Error']['Message'] = 'Key "%s" does not exist' % key raise ClientError(response, 'HeadObject') response['Size'] = int(response.pop('ContentLength')) last_update = parse(response['LastModified']) response['LastModified'] = last_update.astimezone(tzlocal()) return s3_path, response
def multi_upload(self): """ Performs multipart uploads. It initiates the multipart upload. It creates a queue ``part_queue`` which is directly responsible with controlling the progress of the multipart upload. It then creates ``UploadPartTasks`` for threads to run via the ``executer``. This fucntion waits for all of the parts in the multipart upload to finish, and then it completes the multipart upload. This method waits on its parts to finish. So, threads are required to process the parts for this function to complete. """ part_queue = NoBlockQueue(self.interrupt) complete_upload_queue = Queue.PriorityQueue() part_counter = MultiCounter() counter_lock = threading.Lock() bucket, key = find_bucket_key(self.dest) params = {'endpoint': self.endpoint, 'bucket': bucket, 'key': key} if self.parameters['acl']: params['acl'] = self.parameters['acl'][0] if self.parameters['guess_mime_type']: self._inject_content_type(params, self.src) response_data, http = operate(self.service, 'CreateMultipartUpload', params) upload_id = response_data['UploadId'] size_uploads = self.chunksize num_uploads = int(math.ceil(self.size/float(size_uploads))) for i in range(1, (num_uploads + 1)): part_info = (self, upload_id, i, size_uploads) part_queue.put(part_info) task = UploadPartTask(session=self.session, executer=self.executer, part_queue=part_queue, dest_queue=complete_upload_queue, region=self.region, printQueue=self.printQueue, interrupt=self.interrupt, part_counter=part_counter, counter_lock=counter_lock) self.executer.submit(task) part_queue.join() # The following ensures that if the multipart upload is in progress, # all part uploads finish before aborting or completing. This # really only applies when an interrupt signal is sent because the # ``part_queue.join()`` ensures this if the process is not # interrupted. while part_counter.count: time.sleep(0.1) parts_list = [] while not complete_upload_queue.empty(): part = complete_upload_queue.get() parts_list.append(part[1]) if len(parts_list) == num_uploads: parts = {'Parts': parts_list} params = {'endpoint': self.endpoint, 'bucket': bucket, 'key': key, 'upload_id': upload_id, 'multipart_upload': parts} operate(self.service, 'CompleteMultipartUpload', params) else: abort_params = {'endpoint': self.endpoint, 'bucket': bucket, 'key': key, 'upload_id': upload_id} operate(self.service, 'AbortMultipartUpload', abort_params) raise Exception()
def list_objects(self, s3_path, dir_op): """ This function yields the appropriate object or objects under a common prefix depending if the operation is on objects under a common prefix. It yields the file's source path, size, and last update. """ # Short circuit path: if we are not recursing into the s3 # bucket and a specific path was given, we can just yield # that path and not have to call any operation in s3. bucket, prefix = find_bucket_key(s3_path) if not dir_op and prefix: yield self._list_single_object(s3_path) else: lister = BucketLister(self._client) for key in lister.list_objects(bucket=bucket, prefix=prefix, page_size=self.page_size): source_path, response_data = key if response_data['Size'] == 0 and source_path.endswith('/'): if self.operation_name == 'delete': # This is to filter out manually created folders # in S3. They have a size zero and would be # undesirably downloaded. Local directories # are automatically created when they do not # exist locally. But user should be able to # delete them. yield source_path, response_data elif not dir_op and s3_path != source_path: pass else: yield source_path, response_data
def remove_bucket(self): """ This operation removes a bucket. """ bucket, key = find_bucket_key(self.src) params = {'endpoint': self.endpoint, 'bucket': bucket} response_data, http = operate(self.service, 'DeleteBucket', params)
def _list_single_object(self, s3_path): # When we know we're dealing with a single object, we can avoid # a ListObjects operation (which causes concern for anyone setting # IAM policies with the smallest set of permissions needed) and # instead use a HeadObject request. bucket, key = find_bucket_key(s3_path) try: response = self._client.head_object(Bucket=bucket, Key=key) except ClientError as e: # We want to try to give a more helpful error message. # This is what the customer is going to see so we want to # give as much detail as we have. copy_fields = e.__dict__.copy() if not e.error_message == 'Not Found': raise if e.http_status_code == 404: # The key does not exist so we'll raise a more specific # error message here. copy_fields['error_message'] = 'Key "%s" does not exist' % key else: reason = six.moves.http_client.responses[ e.http_status_code] copy_fields['error_code'] = reason copy_fields['error_message'] = reason raise ClientError(**copy_fields) response['Size'] = int(response.pop('ContentLength')) last_update = parse(response['LastModified']) response['LastModified'] = last_update.astimezone(tzlocal()) return s3_path, response
def list_objects(self, s3_path, dir_op): """ This function yields the appropriate object or objects under a common prefix depending if the operation is on objects under a common prefix. It yields the file's source path, size, and last update. """ operation = self._service.get_operation('ListObjects') bucket, prefix = find_bucket_key(s3_path) iterator = operation.paginate(self._endpoint, bucket=bucket, prefix=prefix) for html_response, response_data in iterator: contents = response_data['Contents'] for content in contents: src_path = bucket + '/' + content['Key'] size = content['Size'] last_update = parse(content['LastModified']) last_update = last_update.astimezone(tzlocal()) if size == 0 and src_path.endswith('/'): if self.operation_name == 'delete': # This is to filter out manually created folders # in S3. They have a size zero and would be # undesirably downloaded. Local directories # are automatically created when they do not # exist locally. But user should be able to # delete them. yield src_path, size, last_update elif not dir_op and s3_path != src_path: pass else: yield src_path, size, last_update
def check_src_path(self, paths): """ This checks the source paths to deem if they are valid. The check performed in S3 is first it lists the objects using the source path. If there is an error like the bucket does not exist, the error will be caught with ``check_error()`` funciton. If the operation is on a single object in s3, it checks that a list of object was returned and that the first object listed is the name of the specified in the command line. If the operation is on objects under a common prefix, it will check that there are common prefixes and objects under the specified prefix. For local files, it first checks that the path exists. Then it checks that the path is a directory if it is a directory operation or that the path is a file if the operation is on a single file. """ src_path = paths[0] dir_op = self.parameters['dir_op'] if src_path.startswith('s3://'): if self.cmd in ['ls', 'mb', 'rb']: return session = self.session service = session.get_service('s3') endpoint = service.get_endpoint(self.parameters['region']) src_path = src_path[5:] if dir_op: if not src_path.endswith('/'): src_path += '/' # all prefixes must end with a / bucket, key = find_bucket_key(src_path) operation = service.get_operation('ListObjects') html_response, response_data = operation.call(endpoint, bucket=bucket, prefix=key, delimiter='/') check_error(response_data) contents = response_data['Contents'] common_prefixes = response_data['CommonPrefixes'] if not dir_op: if contents: if contents[0]['Key'] == key: pass else: raise Exception("Error: S3 Object does not exist") else: raise Exception('Error: S3 Object does not exist') else: if not contents and not common_prefixes: raise Exception('Error: S3 Prefix does not exist') else: src_path = os.path.abspath(src_path) if os.path.exists(src_path): if os.path.isdir(src_path) and not dir_op: raise Exception("Error: Requires a local file") elif os.path.isfile(src_path) and dir_op: raise Exception("Error: Requires a local directory") else: pass else: raise Exception("Error: Local path does not exist")
def create_multipart_upload(self): bucket, key = find_bucket_key(self.dest) params = {'endpoint': self.endpoint, 'bucket': bucket, 'key': key} self._handle_object_params(params) response_data, http = operate(self.service, 'CreateMultipartUpload', params) upload_id = response_data['UploadId'] return upload_id
def __call__(self): LOGGER.debug("Waiting for download to finish.") self._context.wait_for_completion() bucket, key = find_bucket_key(self._filename.src) params = {'endpoint': self._filename.endpoint, 'bucket': bucket, 'key': key} response_data, http = operate( self._filename.service, 'DeleteObject', params)
def _do_command(self, parsed_args, parsed_globals): bucket, key = find_bucket_key(parsed_args.paths[0][5:]) self.service = self._session.get_service('s3') self.endpoint = self._get_endpoint(self.service, parsed_globals) if not bucket: self._list_all_buckets() else: self._list_all_objects(bucket, key) return 0
def check_dest_path(self, destination): if destination.startswith('s3://') and \ self.cmd in ['cp', 'sync', 'mv']: bucket, key = find_bucket_key(destination[5:]) # A bucket is not always provided (like 'aws s3 ls') # so only verify the bucket exists if we actually have # a bucket. if bucket: self._verify_bucket_exists(bucket)
def prepare_bucket(self, s3_path): bucket, key = find_bucket_key(s3_path) bucket_exists = self._check_bucket_exists(bucket) if not bucket_exists: self._create_bucket(bucket) sys.stdout.write('Using new S3 bucket: %s\n' % bucket) else: sys.stdout.write('Using existing S3 bucket: %s\n' % bucket) return bucket, key
def download(self): """ Redirects the file to the multipart download function if the file is large. If it is small enough, it gets the file as an object from s3. """ bucket, key = find_bucket_key(self.src) params = {'endpoint': self.endpoint, 'bucket': bucket, 'key': key} response_data, http = operate(self.service, 'GetObject', params) save_file(self.dest, response_data, self.last_update)
def set_size_from_s3(self): """ This runs a ``HeadObject`` on the s3 object and sets the size. """ bucket, key = find_bucket_key(self.src) params = {'Bucket': bucket, 'Key': key} response_data = self.client.head_object(**params) self.size = int(response_data['ContentLength'])
def create_multipart_upload(self): bucket, key = find_bucket_key(self.dest) params = {'Bucket': bucket, 'Key': key} self._inject_content_type(params) RequestParamsMapper.map_create_multipart_upload_params( params, self.parameters) response_data = self.client.create_multipart_upload(**params) upload_id = response_data['UploadId'] return upload_id
def _handle_upload(self, body): bucket, key = find_bucket_key(self.dest) params = { 'Bucket': bucket, 'Key': key, 'Body': body, } self._handle_object_params(params) response_data = self.client.put_object(**params)
def _cancel_upload(self, upload_id, filename): bucket, key = find_bucket_key(filename.dest) params = { 'Bucket': bucket, 'Key': key, 'UploadId': upload_id, } LOGGER.debug("Aborting multipart upload for: %s", key) filename.client.abort_multipart_upload(**params)
def __call__(self): LOGGER.debug("Uploading part copy %s for filename: %s", self._part_number, self._filename.src) total_file_size = self._filename.size start_range = (self._part_number - 1) * self._chunk_size if self._is_last_part(self._part_number): end_range = total_file_size - 1 else: end_range = start_range + self._chunk_size - 1 range_param = "bytes=%s-%s" % (start_range, end_range) try: LOGGER.debug("Waiting for upload id.") upload_id = self._upload_context.wait_for_upload_id() bucket, key = find_bucket_key(self._filename.dest) src_bucket, src_key = find_bucket_key(self._filename.src) params = { "endpoint": self._filename.endpoint, "bucket": bucket, "key": key, "part_number": self._part_number, "upload_id": upload_id, "copy_source": "%s/%s" % (src_bucket, src_key), "copy_source_range": range_param, } response_data, http = operate(self._filename.service, "UploadPartCopy", params) etag = response_data["CopyPartResult"]["ETag"][1:-1] self._upload_context.announce_finished_part(etag=etag, part_number=self._part_number) message = print_operation(self._filename, 0) result = {"message": message, "total_parts": self._total_parts(), "error": False} self._result_queue.put(result) except UploadCancelledError as e: # We don't need to do anything in this case. The task # has been cancelled, and the task that cancelled the # task has already queued a message. LOGGER.debug("Not uploading part copy, task has been cancelled.") except Exception as e: LOGGER.debug("Error during upload part copy: %s", e, exc_info=True) message = print_operation(self._filename, failed=True, dryrun=False) message += "\n" + str(e) result = {"message": message, "error": True} self._result_queue.put(result) self._upload_context.cancel_upload() else: LOGGER.debug("Copy part number %s completed for filename: %s", self._part_number, self._filename.src)
def download(self): """ Redirects the file to the multipart download function if the file is large. If it is small enough, it gets the file as an object from s3. """ bucket, key = find_bucket_key(self.src) params = {'Bucket': bucket, 'Key': key} response_data = self.client.get_object(**params) save_file(self.dest, response_data, self.last_update, self.is_stream)
def make_bucket(self): """ This opereation makes a bucket. """ bucket, key = find_bucket_key(self.src) bucket_config = {'LocationConstraint': self.client.meta.region_name} params = {'Bucket': bucket} if self.client.meta.region_name != 'us-east-1': params['CreateBucketConfiguration'] = bucket_config self.client.create_bucket(**params)
def _normalize_s3_trailing_slash(self, paths): for i, path in enumerate(paths): if path.startswith('s3://'): bucket, key = find_bucket_key(path[5:]) if not key and not path.endswith('/'): # If only a bucket was specified, we need # to normalize the path and ensure it ends # with a '/', s3://bucket -> s3://bucket/ path += '/' paths[i] = path
def __call__(self): LOGGER.debug("Uploading part %s for filename: %s", self._part_number, self._filename.src) try: LOGGER.debug("Waiting for upload id.") upload_id = self._upload_context.wait_for_upload_id() bucket, key = find_bucket_key(self._filename.dest) total = int( math.ceil(self._filename.size / float(self._chunk_size))) body = self._read_part() params = { 'endpoint': self._filename.endpoint, 'bucket': bucket, 'key': key, 'part_number': self._part_number, 'upload_id': upload_id, 'body': body } try: response_data, http = operate(self._filename.service, 'UploadPart', params) finally: body.close() etag = response_data['ETag'][1:-1] self._upload_context.announce_finished_part( etag=etag, part_number=self._part_number) message = print_operation(self._filename, 0) result = {'message': message, 'total_parts': total, 'error': False} self._result_queue.put(result) except UploadCancelledError as e: # We don't need to do anything in this case. The task # has been cancelled, and the task that cancelled the # task has already queued a message. LOGGER.debug("Not uploading part, task has been cancelled.") except Exception as e: LOGGER.debug('Error during part upload: %s', e, exc_info=True) message = print_operation(self._filename, failed=True, dryrun=False) message += '\n' + str(e) result = {'message': message, 'error': True} self._result_queue.put(result) self._upload_context.cancel_upload() else: LOGGER.debug("Part number %s completed for filename: %s", self._part_number, self._filename.src)
def _download_part(self): total_file_size = self._filename.size start_range = self._part_number * self._chunk_size if self._part_number == int(total_file_size / self._chunk_size) - 1: end_range = '' else: end_range = start_range + self._chunk_size - 1 range_param = 'bytes=%s-%s' % (start_range, end_range) LOGGER.debug("Downloading bytes range of %s for file %s", range_param, self._filename.dest) bucket, key = find_bucket_key(self._filename.src) params = {'Bucket': bucket, 'Key': key, 'Range': range_param} RequestParamsMapper.map_get_object_params(params, self._params) for i in range(self.TOTAL_ATTEMPTS): try: LOGGER.debug("Making GetObject requests with byte range: %s", range_param) response_data = self._client.get_object(**params) LOGGER.debug("Response received from GetObject") body = response_data['Body'] self._queue_writes(body) self._context.announce_completed_part(self._part_number) message = print_operation(self._filename, 0) total_parts = int(self._filename.size / self._chunk_size) result = { 'message': message, 'error': False, 'total_parts': total_parts } self._result_queue.put(PrintTask(**result)) LOGGER.debug("Task complete: %s", self) return except (socket.timeout, socket.error, ReadTimeoutError) as e: LOGGER.debug( "Timeout error caught, retrying request, " "(attempt %s / %s)", i, self.TOTAL_ATTEMPTS, exc_info=True) continue except IncompleteReadError as e: LOGGER.debug("Incomplete read detected: %s, (attempt %s / %s)", e, i, self.TOTAL_ATTEMPTS) continue raise RetriesExeededError("Maximum number of attempts exceeded: %s" % self.TOTAL_ATTEMPTS)
def __call__(self): try: part_info = self.part_queue.get(True, QUEUE_TIMEOUT_GET) with self.counter_lock: self.part_counter.count += 1 filename = part_info[0] part_number = part_info[1] size_uploads = part_info[2] last_part_number = int(filename.size / size_uploads) - 1 beginning_range = part_number*size_uploads str_range = "bytes=" if part_number == last_part_number: str_range += str(beginning_range) + "-" else: end_range = beginning_range + size_uploads - 1 str_range += str(beginning_range) + "-" + str(end_range) bucket, key = find_bucket_key(filename.src) try: params = {'endpoint': self.endpoint, 'bucket': bucket, 'key': key, 'range': str_range} response_data, http = operate(self.service, 'GetObject', params) body = response_data['Body'].read() with self.write_lock: self.f.seek(part_number*size_uploads) self.f.write(body) print_str = print_operation(filename, 0) print_result = {'result': print_str} part_str = {'total': int(filename.size / size_uploads)} print_result['part'] = part_str self.printQueue.put(print_result) self.dest_queue.put(part_number) except requests.ConnectionError as e: connect_error = str(e) LOGGER.debug("%s part download failure: %s" % (part_info[0].src, connect_error)) self.part_queue.put(part_info) self.executer.submit(self) except Exception as e: LOGGER.debug('%s' % str(e)) self.part_queue.task_done() with self.counter_lock: self.part_counter.count -= 1 except Queue.Empty: pass
def upload(self): """ Redirects the file to the multipart upload function if the file is large. If it is small enough, it puts the file as an object in s3. """ with open(self.src, 'rb') as body: bucket, key = find_bucket_key(self.dest) params = { 'endpoint': self.endpoint, 'bucket': bucket, 'key': key, 'body': body, } self._handle_object_params(params) response_data, http = operate(self.service, 'PutObject', params) etag = response_data['ETag'][1:-1] body.seek(0) check_etag(etag, body)
def _handle_rm_force(self, parsed_globals, parameters): """ This function recursive deletes objects in a bucket if the force parameters was thrown when using the remove bucket command. """ # XXX: This shouldn't really be here. This was originally moved from # the CommandParameters class to here, but this is still not the ideal # place for this code. This should be moved # to either the CommandArchitecture class, or the RbCommand class where # the actual operations against S3 are performed. This may require # some refactoring though to move this to either of those classes. # For now, moving this out of CommandParameters allows for that class # to be kept simple. if 'force' in parameters: if parameters['force']: bucket = find_bucket_key(parameters['src'][5:])[0] path = 's3://' + bucket del_objects = RmCommand(self._session) del_objects([path, '--recursive'], parsed_globals)
def __call__(self): total_file_size = self._filename.size start_range = self._part_number * self._chunk_size if self._part_number == int(total_file_size / self._chunk_size) - 1: end_range = '' else: end_range = start_range + self._chunk_size - 1 range_param = 'bytes=%s-%s' % (start_range, end_range) LOGGER.debug("Downloading bytes range of %s for file %s", range_param, self._filename.dest) bucket, key = find_bucket_key(self._filename.src) params = { 'endpoint': self._filename.endpoint, 'bucket': bucket, 'key': key, 'range': range_param } try: LOGGER.debug("Making GetObject requests with byte range: %s", range_param) response_data, http = operate(self._service, 'GetObject', params) LOGGER.debug("Response received from GetObject") body = response_data['Body'] self._write_to_file(body) self._context.announce_completed_part(self._part_number) message = print_operation(self._filename, 0) total_parts = int(self._filename.size / self._chunk_size) result = { 'message': message, 'error': False, 'total_parts': total_parts } self._result_queue.put(result) except Exception as e: LOGGER.debug('Exception caught downloading byte range: %s', e, exc_info=True) self._context.cancel() raise e
def upload(self): """ Redirects the file to the multipart upload function if the file is large. If it is small enough, it puts the file as an object in s3. """ if not self.is_multi: body = read_file(self.src) bucket, key = find_bucket_key(self.dest) if sys.version_info[:2] == (2, 6): stream_body = StringIO(body) else: stream_body = bytearray(body) params = {'endpoint': self.endpoint, 'bucket': bucket, 'key': key} if body: params['body'] = stream_body if self.parameters['acl']: params['acl'] = self.parameters['acl'][0] response_data, http = operate(self.service, 'PutObject', params) etag = retrieve_http_etag(http) check_etag(etag, body) else: self.multi_upload()
def list_objects(self, s3_path, dir_op): """ This function yields the appropriate object or objects under a common prefix depending if the operation is on objects under a common prefix. It yields the file's source path, size, and last update. """ # Short circuit path: if we are not recursing into the s3 # bucket and a specific path was given, we can just yield # that path and not have to call any operation in s3. bucket, prefix = find_bucket_key(s3_path) if not dir_op and prefix: yield self._list_single_object(s3_path) else: operation = self._service.get_operation('ListObjects') iterator = operation.paginate(self._endpoint, bucket=bucket, prefix=prefix) for html_response, response_data in iterator: contents = response_data['Contents'] for content in contents: src_path = bucket + '/' + content['Key'] size = content['Size'] last_update = parse(content['LastModified']) last_update = last_update.astimezone(tzlocal()) if size == 0 and src_path.endswith('/'): if self.operation_name == 'delete': # This is to filter out manually created folders # in S3. They have a size zero and would be # undesirably downloaded. Local directories # are automatically created when they do not # exist locally. But user should be able to # delete them. yield src_path, size, last_update elif not dir_op and s3_path != src_path: pass else: yield src_path, size, last_update
def _handle_rm_force(self, parsed_globals, parameters): """ This function recursively deletes objects in a bucket if the force parameter was thrown when using the remove bucket command. It will refuse to delete if a key is specified in the s3path. """ # XXX: This shouldn't really be here. This was originally moved from # the CommandParameters class to here, but this is still not the ideal # place for this code. This should be moved # to either the CommandArchitecture class, or the RbCommand class where # the actual operations against S3 are performed. This may require # some refactoring though to move this to either of those classes. # For now, moving this out of CommandParameters allows for that class # to be kept simple. if 'force' in parameters: if parameters['force']: bucket, key = find_bucket_key(parameters['src'][5:]) if key: raise ValueError('Please specify a valid bucket name only.' ' E.g. s3://%s' % bucket) path = 's3://' + bucket del_objects = RmCommand(self._session) del_objects([path, '--recursive'], parsed_globals)
def test_bucket(self): bucket, key = find_bucket_key('bucket') self.assertEqual(bucket, 'bucket') self.assertEqual(key, '')
def multi_upload(self): """ Performs multipart uploads. It initiates the multipart upload. It creates a queue ``part_queue`` which is directly responsible with controlling the progress of the multipart upload. It then creates ``UploadPartTasks`` for threads to run via the ``executer``. This fucntion waits for all of the parts in the multipart upload to finish, and then it completes the multipart upload. This method waits on its parts to finish. So, threads are required to process the parts for this function to complete. """ part_queue = NoBlockQueue(self.interrupt) complete_upload_queue = Queue.PriorityQueue() part_counter = MultiCounter() counter_lock = threading.Lock() bucket, key = find_bucket_key(self.dest) params = {'endpoint': self.endpoint, 'bucket': bucket, 'key': key} if self.parameters['acl']: params['acl'] = self.parameters['acl'][0] if self.parameters['guess_mime_type']: self._inject_content_type(params, self.src) response_data, http = operate(self.service, 'CreateMultipartUpload', params) upload_id = response_data['UploadId'] size_uploads = self.chunksize num_uploads = int(math.ceil(self.size / float(size_uploads))) for i in range(1, (num_uploads + 1)): part_info = (self, upload_id, i, size_uploads) part_queue.put(part_info) task = UploadPartTask(session=self.session, executer=self.executer, part_queue=part_queue, dest_queue=complete_upload_queue, region=self.region, printQueue=self.printQueue, interrupt=self.interrupt, part_counter=part_counter, counter_lock=counter_lock) self.executer.submit(task) part_queue.join() # The following ensures that if the multipart upload is in progress, # all part uploads finish before aborting or completing. This # really only applies when an interrupt signal is sent because the # ``part_queue.join()`` ensures this if the process is not # interrupted. while part_counter.count: time.sleep(0.1) parts_list = [] while not complete_upload_queue.empty(): part = complete_upload_queue.get() parts_list.append(part[1]) if len(parts_list) == num_uploads: parts = {'Parts': parts_list} params = { 'endpoint': self.endpoint, 'bucket': bucket, 'key': key, 'upload_id': upload_id, 'multipart_upload': parts } operate(self.service, 'CompleteMultipartUpload', params) else: abort_params = { 'endpoint': self.endpoint, 'bucket': bucket, 'key': key, 'upload_id': upload_id } operate(self.service, 'AbortMultipartUpload', abort_params) raise Exception()
def __call__(self): LOGGER.debug("Waiting for download to finish.") self._context.wait_for_completion() bucket, key = find_bucket_key(self._filename.src) params = {'Bucket': bucket, 'Key': key} self._filename.source_client.delete_object(**params)
def _submit_transfer_request(self, fileinfo, extra_args, subscribers): bucket, key = find_bucket_key(fileinfo.src) return self._transfer_manager.delete( bucket=bucket, key=key, extra_args=extra_args, subscribers=subscribers)
def prepare_bucket(self, s3_path): bucket, key = find_bucket_key(s3_path) bucket_exists = self._check_bucket_exists(bucket) if not bucket_exists: self._create_bucket(bucket) return bucket, key
def test_accesspoint_arn_with_key_and_prefix(self): bucket, key = find_bucket_key( 'arn:aws:s3:us-west-2:123456789012:accesspoint/endpoint/pre/key') self.assertEqual( bucket, 'arn:aws:s3:us-west-2:123456789012:accesspoint/endpoint') self.assertEqual(key, 'pre/key')
def test_accesspoint_arn_with_slash(self): bucket, key = find_bucket_key( 'arn:aws:s3:us-west-2:123456789012:accesspoint/endpoint/') self.assertEqual( bucket, 'arn:aws:s3:us-west-2:123456789012:accesspoint/endpoint') self.assertEqual(key, '')
def test_bucket_with_key_and_prefix(self): bucket, key = find_bucket_key('bucket/prefix/key') self.assertEqual(bucket, 'bucket') self.assertEqual(key, 'prefix/key')
def test_bucket_with_key(self): bucket, key = find_bucket_key('bucket/key') self.assertEqual(bucket, 'bucket') self.assertEqual(key, 'key')
def test_unicode(self): s3_path = '\u1234' + u'/' + '\u5678' bucket, key = find_bucket_key(s3_path) self.assertEqual(bucket, '\u1234') self.assertEqual(key, '\u5678')
def remove_bucket(self): """ This operation removes a bucket. """ bucket, key = find_bucket_key(self.src) self.client.delete_bucket(Bucket=bucket)
def test_bucket_with_slash(self): bucket, key = find_bucket_key('bucket/') self.assertEqual(bucket, 'bucket') self.assertEqual(key, '')