def upload_artifact(client, artifacts_s3_bucket, artifact_path, local_dir): total_size = 0 for root, sub_dirs, files in os.walk(local_dir): for filename in files: file_path = os.path.join(root, filename) file_size = os.path.getsize(file_path) total_size += file_size uploader = transfer.S3Transfer(client, transfer.TransferConfig(), transfer.OSUtils()) with tqdm(total=total_size, unit='B', unit_scale=True, unit_divisor=1024, desc=f"Uploading artifact '{short_artifact_path(artifact_path)}'" ) as pbar: def callback(size): pbar.update(size) for root, sub_dirs, files in os.walk(local_dir): for filename in files: file_path = os.path.join(root, filename) key = artifact_path + __remove_prefix( str(file_path), str(Path(local_dir).absolute())) uploader.upload_file( str(file_path), artifacts_s3_bucket, key, callback=callback, )
def download_artifact(client, artifacts_s3_bucket, artifact_path, output_dir=None): output_path = Path(output_dir if output_dir is not None else os.getcwd()) response = client.list_objects(Bucket=artifacts_s3_bucket, Prefix=artifact_path) total_size = 0 keys = [] etags = [] for obj in response.get("Contents") or []: key = obj["Key"] etag = obj["ETag"] dest_path = dest_file_path(key, output_path) if dest_path.exists(): etag_path = etag_file_path(key, output_path) if etag_path.exists(): if etag_path.read_text() != etag: os.remove(etag_path) os.remove(dest_path) else: continue total_size += obj["Size"] if obj["Size"] > 0 and not key.endswith("/"): # Skip empty files that designate folders (required by FUSE) keys.append(key) etags.append(etag) downloader = transfer.S3Transfer(client, transfer.TransferConfig(), transfer.OSUtils()) # TODO: Make download files in parallel with tqdm( total=total_size, unit='B', unit_scale=True, unit_divisor=1024, desc=f"Downloading artifact '{short_artifact_path(artifact_path)}'" ) as pbar: for i in range(len(keys)): key = keys[i] etag = etags[i] def callback(size): pbar.update(size) file_path = dest_file_path(key, output_path) file_path.parent.mkdir(parents=True, exist_ok=True) downloader.download_file(artifacts_s3_bucket, key, str(file_path), callback=callback) etag_path = Path(etag_file_path(key, output_path)) etag_path.parent.mkdir(parents=True, exist_ok=True) etag_path.write_text(etag)
def __init__(self, s3_client, bucket_name, prefix=None, kms_key_id=None, force_upload=False): self.s3 = s3_client self.bucket_name = bucket_name self.prefix = prefix self.kms_key_id = kms_key_id or None self.force_upload = force_upload self.transfer_manager = transfer.create_transfer_manager(self.s3, transfer.TransferConfig()) self._artifact_metadata = None
def read_aws_boto(client, bucket_name, bucket_path, dest_file): """ read an s3 resource via normal boto interface mutliparted """ config = tfr.TransferConfig( multipart_threshold=2 * 1024 * 1024, max_concurrency=10, num_download_attempts=10, ) transfer = tfr.S3Transfer(client, config) transfer.download_file(bucket_name, bucket_path, dest_file)
def write_aws_boto(client, bucket_name, bucket_path, source_file): """ write to aws via normal boto interface multiparted """ config = tfr.TransferConfig( multipart_threshold=2 * 1024 * 1024, max_concurrency=10, num_download_attempts=10, ) transfer = tfr.S3Transfer(client, config) transfer.upload_file(source_file, bucket_name, bucket_path, callback=ProgressPercentage(source_file))
def upload_files(mirror, all_tarballs): upload_config = transfer.TransferConfig(max_concurrency=10, use_threads=True) s3 = boto3.client('s3') monthly_directory = time.strftime("%Y_%m") for file_name in all_tarballs: print(f'Uploading {file_name} to S3') s3.upload_file(file_name, mirror.aws_bucket, f'{monthly_directory}/{file_name}', ExtraArgs={'ACL': 'public-read'}, Config=upload_config)
def get_s3_transfer(self): logger.info("Init s3 transfer {url}".format(url=self.endpoint_url)) s3_config = transfer.TransferConfig(multipart_threshold=10 * TB, max_concurrency=10, multipart_chunksize=1 * TB, num_download_attempts=5, max_io_queue=100, io_chunksize=256 * KB, use_threads=True) try: s3_transfer = transfer.S3Transfer(self.s3_client, s3_config) return s3_transfer except Exception as e: raise Exception(e)
def upload_files(aws_bucket: str, monthly_directory: str, all_tarballs: List[Path]): upload_config = transfer.TransferConfig( max_concurrency=10, use_threads=True ) s3 = boto3.client('s3') for tar in all_tarballs: name = tar.parts[-1] s3.upload_file( tar.absolute().as_posix(), aws_bucket, f'{monthly_directory}/{name}', ExtraArgs={'ACL': 'public-read'}, Config=upload_config ) _logger.info(f'uploaded {tar} to {aws_bucket}')
def __init__( self, s3_client: Any, bucket_name: str, prefix: Optional[str] = None, kms_key_id: Optional[str] = None, force_upload: bool = False, no_progressbar: bool = False, ): self.s3 = s3_client self.bucket_name = bucket_name self.prefix = prefix self.kms_key_id = kms_key_id or None self.force_upload = force_upload self.no_progressbar = no_progressbar self.transfer_manager = transfer.create_transfer_manager( self.s3, transfer.TransferConfig()) self._artifact_metadata = None
def transfer_config_generator(self, multipart_threshold: int = None, max_concurrency: int = None, multipart_chunksize: int = None, num_download_attempts: int = None, max_io_queue: int = None, io_chunksize: int = None, use_threads: bool = None): try: return transfer.TransferConfig( multipart_threshold=multipart_threshold or self.TRANSFER_MULTIPART_THRESHOLD, max_concurrency=max_concurrency or self.TRANSFER_MAX_CONCURRENCY, multipart_chunksize=multipart_chunksize or self.TRANSFER_MULTIPART_CHUNKSIZE, num_download_attempts=num_download_attempts or self.TRANSFER_NUM_DOWNLOAD_ATTEMPTS, max_io_queue=max_io_queue or self.TRANSFER_MAX_IO_QUEUE, io_chunksize=io_chunksize or self.TRANSFER_IO_CHUNKSIZE, use_threads=use_threads or self.TRANSFER_USE_THREADS) except Exception as e: return self._exception_handler(e)
def _upload_file(source_path: str, bucket: str, target_path: str, storage_class: StorageClass) -> bool: logger.debug(f'Uploading {source_path} to {target_path}...') botocore_config = botocore.config.Config(max_pool_connections=CONNECTIONS) s3 = boto3.client('s3', config=botocore_config) transfer_config = s3transfer.TransferConfig( use_threads=True, max_concurrency=CONNECTIONS, ) s3t = s3transfer.create_transfer_manager(s3, transfer_config) total_size = os.path.getsize(source_path) progress = tqdm.tqdm( desc='upload', total=total_size, unit='B', unit_scale=1, position=0, bar_format='{desc:<10}{percentage:3.0f}%|{bar:10}{r_bar}') future = s3t.upload( source_path, bucket, target_path, extra_args={ 'ServerSideEncryption': 'AES256', 'StorageClass': storage_class.name }, subscribers=[s3transfer.ProgressCallbackInvoker(progress.update)]) try: future.result() except ClientError as e: logger.error(f'Upload failed for {source_path} to {target_path}', e) return False finally: s3t.shutdown() progress.close() return True
def create_s3t(): s3_client = create_s3_client() transfer_config = s3transfer.TransferConfig( use_threads=True, max_concurrency=s3_client.meta.config.max_pool_connections ) return s3transfer.create_transfer_manager(s3_client, config=transfer_config)