def __init__(self, *args, **kwargs): MarketRecorder.__init__(self, *args, **kwargs) self._bucket = self.context["bucket"] self._data_type = self.context.get("data_type", "marketdata") self._default_event_type_id = self.context.get("default_event_type_id", "7") self.s3 = boto3.client("s3") transfer_config = TransferConfig(use_threads=False) self.transfer = S3Transfer(self.s3, config=transfer_config)
def __init__(self, directory, access_key=None, secret_key=None): self.s3 = boto3.client( 's3', aws_access_key_id=access_key, aws_secret_access_key=secret_key, ) transfer_config = TransferConfig(use_threads=False) self.transfer = S3Transfer(self.s3, config=transfer_config) super(S3, self).__init__(directory)
def default(cls) -> TransferConfig: "" section = CONFIG[cls.config_entry] return TransferConfig( max_io_queue=section['max_io_queue@int'], max_concurrency=section['max_concurrency@int'], io_chunksize=section['io_chunksize@int'], multipart_chunksize=section['multipart_chunksize@int'], )
def upload_file(file, bucket_name, object_name): # f = file.read() # s3 = boto3.client('s3') # s3.upload_fileobj(file, bucket_name, object_name) config = TransferConfig() config.use_threads = False s3 = boto3.resource('s3') bucket = s3.Bucket(bucket_name) bucket.upload_fileobj(file, object_name, Config=config)
def execute(self): """Executes the fetch operation. This is different to the DB API as it returns an iterable. Of course we could model that API more precisely in future. :return: An iterable of the records fetched """ """ All operations return a CSV file """ print("Executing Pandas cursor!") self.timer.start() #self.input = Format.CSV config = TransferConfig(multipart_chunksize=8 * MB, multipart_threshold=8 * MB) try: #h1 = http.client.HTTPConnection('127.0.0.1:5000') #h1 = http.client.HTTPConnection("3.87.65.94:5000") h1 = http.client.HTTPConnection(FILTER_IP) print("Connected") request_body = None if self.input is Format.CSV: request_body = json.dumps({ "query": self.s3sql, "input": "CSV" }) elif self.input is Format.PARQUET: request_body = json.dumps({ "query": self.s3sql, "input": "PARQUET" }) else: raise Exception("Unrecognised InputType {}".format(self.input)) h1.request('POST', '/' + S3_BUCKET_NAME + '/' + self.s3key, body=request_body) print("Made request!") r = h1.getresponse() #print(r.read()) r2 = r.read() #.decode("utf-8") #print(r2) #LOST TO DO!! self.table_data = io.BytesIO() self.table_data.write(r2) print("HELLO!") print(self.table_data.getvalue().decode('utf-8')) self.num_http_get_requests = PandasCursor.calculate_num_http_requests( self.table_data, config) return self.parse_file() except Exception as e: print(e)
def download_fileobj(self, key): buf = BytesIO() self.bucket.download_fileobj( Key=key, Fileobj=buf, Config=TransferConfig(max_io_queue=self.transfer_config['max_io_queue']) ) buf.seek(0) return buf
def downloader(self): """Singleton S3 "downloader" for all downloads. All concurrent downloads reuse the same thread pool. This caps the number of threads S3 uses. """ if self._downloader is None: self._downloader = S3Transfer(self.client, TransferConfig()) return self._downloader
def multi_part_upload_with_s3(filename=None, key_path=None, bucket=None, upload_type="single"): start_time = default_timer() # print(f"\t bucket -> {bucket} ") if bucket == "-hk": s3 = boto3.resource( 's3', region_name="ap-east-1" ) else: s3 = boto3.resource( 's3', # region_name="ap-northeast-2" ) ##single parts if upload_type == "single": # s3.meta.client.meta.events.register('choose-signer.s3.*', disable_signing) config = TransferConfig(multipart_threshold=838860800, max_concurrency=10, multipart_chunksize=8388608, num_download_attempts=5, max_io_queue=100, io_chunksize=262144, use_threads=True) # multiparts mode -> AWS S3 CLI: Anonymous users cannot initiate multipart uploads elif upload_type == "multi": pass config = TransferConfig(multipart_threshold=1024 * 25, max_concurrency=10, multipart_chunksize=1024 * 25, use_threads=True) else: CPrint(f"Unknown upload_type-> {upload_type}", "red") if filename is None: CPrint(f"[ERROR] filename is None", "red") raise SystemExit() if key_path is None: key_path = filename try: s3.meta.client.upload_file(filename, bucket, key_path, # ExtraArgs={'ACL': 'public-read', 'ContentType': 'text/pdf'}, Config=config, Callback=ProgressPercentage(filename) ) except Exception as e: e = str(e).replace(":", ":\n") CPrint(f"\n[ERROR] File upload fail / cause->{e}\n","red") sys.exit(1) # raise SystemExit() elapsed = default_timer() - start_time time_completed_at = "{:5.3f}s".format(elapsed) print(f"\n\t Upload is completed -> {filename} / {time_completed_at}")
def dump_artifacts(): """Dump all test campaign artifacts from the S3 repository. It allows collecting all the artifacts from the S3 repository. It could be overriden if the common implementation is not suitable. The credentials must be configured before publishing the artifacts: * fill ~/.aws/credentials or ~/.boto, * set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY in env. The next vars must be set in env: * S3_ENDPOINT_URL (http://127.0.0.1:9000), * S3_DST_URL (s3://xtesting/prefix), Returns: Campaign.EX_OK if artifacts were published to repository. Campaign.EX_DUMP_ARTIFACTS_ERROR otherwise. """ try: build_tag = env.get('BUILD_TAG') b3resource = boto3.resource( 's3', endpoint_url=os.environ["S3_ENDPOINT_URL"]) dst_s3_url = os.environ["S3_DST_URL"] multipart_threshold = 5 * 1024**5 if "google" in os.environ[ "S3_ENDPOINT_URL"] else 8 * 1024 * 1024 tconfig = TransferConfig(multipart_threshold=multipart_threshold) bucket_name = urllib.parse.urlparse(dst_s3_url).netloc s3path = re.search('^/*(.*)/*$', urllib.parse.urlparse(dst_s3_url).path).group(1) prefix = os.path.join(s3path, build_tag) # pylint: disable=no-member for s3_object in b3resource.Bucket(bucket_name).objects.filter( Prefix=f"{prefix}/"): path, _ = os.path.split( urllib.parse.unquote_plus(s3_object.key)) lpath = re.sub(f'^{s3path}/*', '', path) if lpath and not os.path.exists(lpath): os.makedirs(lpath) Campaign.__logger.info( "Downloading %s", re.sub(f'^{s3path}/*', '', urllib.parse.unquote_plus(s3_object.key))) # pylint: disable=no-member b3resource.Bucket(bucket_name).download_file( urllib.parse.unquote_plus(s3_object.key), re.sub(f'^{s3path}/*', '', urllib.parse.unquote_plus(s3_object.key)), Config=tconfig) return Campaign.EX_OK except Exception: # pylint: disable=broad-except Campaign.__logger.exception("Cannot publish the artifacts") return Campaign.EX_DUMP_ARTIFACTS_ERROR
def upload_file(frmt_list): s3_uploads = [] asset_list = [] data_set_name = os.environ['DATA_SET_NAME'] data_dir = '/tmp' s3_bucket = os.environ['S3_BUCKET'] if (data_set_name is None): raise ('DATA_SET_NAME environment not set') if (s3_bucket is None): raise ('S3_BUCKET environment not set') s3 = boto3.client('s3') s3_resource = boto3.resource('s3') config = TransferConfig(multipart_threshold=1024 * 25, max_concurrency=10, multipart_chunksize=1024 * 25, use_threads=True) for frmt in frmt_list: obj_name = data_set_name + frmt #file_location.split('/', 3).pop().replace(' ', '_').lower() file_location = os.path.join(data_dir, obj_name) new_s3_key = data_set_name + '/dataset/' + obj_name with open(file_location) as f: mystring = f.read() filedata = bytes(mystring, 'utf-8') has_changes = s3md5.md5_compare(s3, s3_bucket, new_s3_key, io.BytesIO(filedata)) if has_changes: s3_resource.Object(s3_bucket, new_s3_key).put(Body=filedata) # sys.exit(0) print('Uploaded: ' + file_location) else: print('No changes in: ' + file_location) asset_source = {'Bucket': s3_bucket, 'Key': new_s3_key} s3_uploads.append({ 'has_changes': has_changes, 'asset_source': asset_source }) count_updated_data = sum(upload['has_changes'] == True for upload in s3_uploads) if count_updated_data > 0: asset_list = list( map(lambda upload: upload['asset_source'], s3_uploads)) if len(asset_list) == 0: raise Exception('Something went wrong when uploading files to s3') # asset_list is returned to be used in lamdba_handler function # if it is empty, lambda_handler will not republish return asset_list
def multipart_upload_with_s3(self, bucket_name, file_path=None, object_name=None): # Multipart upload (see notes) config = TransferConfig(multipart_threshold=1024 * 25, max_concurrency=10, multipart_chunksize=1024 * 25, use_threads=True) key_path = 'multipart_files/{}'.format(object_name) print(bucket_name, file_path, object_name, key_path) self.s3_client.upload_file(file_path, bucket_name, key_path, ExtraArgs={'ACL': 'public-read', 'ContentType': 'text/pdf'}, Config=config, Callback=progressPercent.ProgressPercentage(file_path))
def multi_part_upload_with_s3(src_file, bucket_name, dst_file): # Multipart upload config = TransferConfig(multipart_threshold=1024 * 25, max_concurrency=10, multipart_chunksize=1024 * 25, use_threads=True) s3 = boto3.resource('s3') cb = ProgressPercentage(src_file) s3.meta.client.upload_file(src_file, bucket_name, dst_file, Config=config#, #Callback=cb )
def __init__(self, bucket): self.bucketName = bucket self.s3 = boto3.client('s3') self.s3Config = TransferConfig(multipart_threshold=1024 * 25, max_concurrency=10, multipart_chunksize=1024 * 25, use_threads=True) self.s3Transfer = S3Transfer(client=self.s3, config=self.s3Config) self.session = boto3.Session()
def __init__(self, *args, **kwargs) -> None: kwargs['client_type'] = 's3' self.extra_args = {} if 'extra_args' in kwargs: self.extra_args = kwargs['extra_args'] if not isinstance(self.extra_args, dict): raise ValueError(f"extra_args '{self.extra_args!r}' must be of type {dict}") del kwargs['extra_args'] self.transfer_config = TransferConfig() if 'transfer_config_args' in kwargs: transport_config_args = kwargs['transfer_config_args'] if not isinstance(transport_config_args, dict): raise ValueError(f"transfer_config_args '{transport_config_args!r} must be of type {dict}") self.transfer_config = TransferConfig(**transport_config_args) del kwargs['transfer_config_args'] super().__init__(*args, **kwargs)
def _upload_obj(self, stream, key, org_size): parts = org_size // self.MULTI_PART_CHUNK_SIZE chunk_size = self.MULTI_PART_CHUNK_SIZE if parts < self.PARTS_LIMIT else org_size // self.PARTS_LIMIT t_config = TransferConfig(multipart_threshold=self.MULTI_PART_THRESHOLD, multipart_chunksize=chunk_size) self.bucket._upload_file_obj(key, stream, t_config) self.files_copied += 1 self.log.info(f'Copying file {key} SUCCEEDED!') return self.bucket.get_object_size(key)
def deleteImageInS3(s3Client, bucket, uploaded_image): config = TransferConfig(multipart_threshold=1024 * 25, max_concurrency=10, multipart_chunksize=1024 * 25, use_threads=True) try: s3Client.delete_object(Bucket=bucket, Key=uploaded_image) except ClientError as e: print(e)
def download_fileobj(self, Bucket, Key, Fileobj, ExtraArgs=None, Callback=None, Config=None): """Download an object from S3 to a file-like object. The file-like object must be in binary mode. This is a managed transfer which will perform a multipart download in multiple threads if necessary. Usage:: import boto3 s3 = boto3.client('s3') with open('filename', 'wb') as data: s3.download_fileobj('mybucket', 'mykey', data) :type Fileobj: a file-like object :param Fileobj: A file-like object to download into. At a minimum, it must implement the `write` method and must accept bytes. :type Bucket: str :param Bucket: The name of the bucket to download from. :type Key: str :param Key: The name of the key to download from. :type ExtraArgs: dict :param ExtraArgs: Extra arguments that may be passed to the client operation. :type Callback: method :param Callback: A method which takes a number of bytes transferred to be periodically called during the download. :type Config: boto3.s3.transfer.TransferConfig :param Config: The transfer configuration to be used when performing the download. """ if not hasattr(Fileobj, 'write'): raise ValueError('Fileobj must implement write') subscribers = None if Callback is not None: subscribers = [ProgressCallbackInvoker(Callback)] config = Config if config is None: config = TransferConfig() with create_transfer_manager(self, config) as manager: future = manager.download( bucket=Bucket, key=Key, fileobj=Fileobj, extra_args=ExtraArgs, subscribers=subscribers) return future.result()
def stream_to_s3(self, video_id): """ Stream the contents of the given URL to Amazon S3 """ if not video_id: return False try: video = Video.objects.get(id=video_id) except (Video.DoesNotExist, Video.MultipleObjectsReturned): log.error("Exception retrieving video", video_id=video_id) raise video.update_status(VideoStatus.UPLOADING) task_id = self.get_task_id() try: response = requests.get(video.source_url, stream=True, timeout=60) response.raise_for_status() except requests.HTTPError: video.update_status(VideoStatus.UPLOAD_FAILED) self.update_state(task_id=task_id, state=states.FAILURE) raise _, content_type, content_length = parse_content_metadata(response) s3 = boto3.resource("s3") bucket_name = settings.VIDEO_S3_BUCKET bucket = s3.Bucket(bucket_name) total_bytes_uploaded = 0 def callback(bytes_uploaded): """ Callback function after upload """ nonlocal total_bytes_uploaded total_bytes_uploaded += bytes_uploaded data = { "uploaded": total_bytes_uploaded, "total": content_length, } self.update_state(task_id=task_id, state="PROGRESS", meta=data) config = TransferConfig(**settings.AWS_S3_UPLOAD_TRANSFER_CONFIG) try: bucket.upload_fileobj( Fileobj=response.raw, Key=video.get_s3_key(), ExtraArgs={"ContentType": content_type}, Callback=callback, Config=config, ) except Exception: video.update_status(VideoStatus.UPLOAD_FAILED) self.update_state(task_id=task_id, state=states.FAILURE) raise
def upload_multi_file( s3_client, file_obj, bucket, object_name=None, # pylint: disable=E0012,C0330 no_progress=False, ): # noqa: D413 """Upload a file to an S3 bucket. Args: s3_client: Boto s3 client. file_obj (MultiFileReader): File-like object with read() and __iter__ methods bucket (str): Bucket to upload to. object_name (str): S3 object name. If not specified then file_name is used Returns: True if file was uploaded, else False """ # If S3 object_name was not specified, use file_name if object_name is None: object_name = file_obj.name # Upload the file try: # Set desired multipart threshold value of 5GB config = TransferConfig( multipart_threshold=CHUNK_SIZE, multipart_chunksize=CHUNK_SIZE, use_threads=True, max_concurrency=10, ) if not no_progress: progress_bar = get_progress_bar( file_obj.get_size(), "Uploading: " ) progress_bar.start() s3_client.upload_fileobj( file_obj, bucket, object_name, Config=config, Callback=_progress_bar_update(progress_bar) if not no_progress else None, ) if not no_progress: progress_bar.finish() except ClientError as err: echo_info("Failed to upload file {}: {}".format(file_obj.name, err)) return False return True
def _upload_func(self, s3_uri, func, archive): _, bucket, key_prefix = parse_s3(s3_uri) key = "%s/%s" % (key_prefix, func.name) transfer = S3Transfer(self.session_factory().client('s3'), config=TransferConfig(multipart_threshold=1024 * 1024 * 4)) transfer.upload_file(archive.path, bucket=bucket, key=key, extra_args={'ServerSideEncryption': 'AES256'}) return bucket, key
def download_with_single_thread(bucket_name, object_key, download_file_path, file_size_mb): """ Download a file from an Amazon S3 bucket to a local folder, using a single thread. """ transfer_callback = TransferCallback(file_size_mb) config = TransferConfig(use_threads=False) s3.Bucket(bucket_name).Object(object_key).download_file( download_file_path, Config=config, Callback=transfer_callback) return transfer_callback.thread_info
def multi_part_upload_with_s3(): # Multipart upload config = TransferConfig(multipart_threshold=1024 * 25, max_concurrency=10, multipart_chunksize=1024 * 25, use_threads=True) file_path = os.path.dirname(__file__) + '/largefile.pdf' key_path = 'multipart_files/largefile.pdf' s3.meta.client.upload_file(file_path, BUCKET_NAME, key_path, ExtraArgs={'ACL': 'public-read', 'ContentType': 'text/pdf'}, Config=config, Callback=ProgressPercentage(file_path) )
def __init__(self, target_dir, flatten, chunk_size, concurrency): # Get the service resource client = boto3.client('s3') config = TransferConfig(multipart_threshold=chunk_size, max_concurrency=concurrency) self.s3 = S3Transfer(client, config) # Set up instance variables self.target_dir = target_dir self.flatten = flatten
def _upload_file(): # this function uses the relative scoping features client = get_s3_client(**kwargs) if large: config = TransferConfig(multipart_threshold=thrsh_, max_concurrency=10, multipart_chunksize=thrsh_, use_threads=True) client.upload_file(file_path, bucket_name, aws_name, Config=config) else: client.upload_file(file_path, bucket_name, aws_name)
def __init__(self, case): super(TransferTest, self).__init__(case) self.s3 = get_env_s3_client() tc = TransferConfig(multipart_threshold=5 * 1024 * 1024, max_concurrency=10, multipart_chunksize=5 * 1024 * 1024, num_download_attempts=5, max_io_queue=100, io_chunksize=262144, use_threads=True) self.tm = TransferManager(self.s3, tc)
def upload_all(self, d): # strip trailing slashes so that we're sure that the path we create by # removing this as a prefix does not start with a /. if not d.endswith('/'): d = d + "/" transfer_config = TransferConfig(**self.upload_config) for dirpath, dirs, files in walk(d): if dirpath.startswith(d): suffix = dirpath[len(d):] self._upload_files(dirpath, suffix, files, transfer_config)
def __init__(self): """Init and run main loop """ self.uploadsPaused_ = False self.uploadQueue_ = deque() # Get alternate endpoint for base station video offload endpoint = rospy.get_param('~s3_endpoint') if "" == endpoint: endpoint = None else: rospy.loginfo("Using alternate S3 endpoint {}".format(endpoint)) s3 = boto3.resource(service_name='s3', endpoint_url=endpoint) upload_config = TransferConfig(multipart_threshold=MULTIPART_THRESHOLD) client = s3.meta.client # Pubs, Subs & Srvs rospy.Subscriber("s3ros/uploadLocalFile", Upload, self.localUploadCB) rospy.Subscriber("s3ros/pauseUploads", Bool, self.pauseUploadsCB) # Main loop monitors upload queue and uploads when necessary r = rospy.Rate(20) while not rospy.is_shutdown(): if len(self.uploadQueue_) > 0: toUpload = self.uploadQueue_.popleft() if not os.path.isfile(toUpload[0]): rospy.logwarn("{} is not a file".format(toUpload[0])) continue try: rospy.loginfo("Attempting to upload {} to {}/{}/{}".format( toUpload[0], endpoint, toUpload[1], toUpload[2])) rsp = client.upload_file(toUpload[0], toUpload[1], toUpload[2], Config=upload_config) rospy.loginfo("Upload succeeded") except boto3.exceptions.S3UploadFailedError as e: rospy.logerr( "Could not upload {0} to bucket {1}/{2}".format( *toUpload)) rospy.logerr(e) except Exception as e: rospy.logerr(e) r.sleep() if len(self.uploadQueue_) > 0: rospy.logwarn("Upload queue not empty")
def test_io_thread_fails_to_open_triggers_shutdown_error(self): client = mock.Mock() client.get_object.return_value = {'Body': six.BytesIO(b'asdf')} os_layer = mock.Mock(spec=OSUtils) os_layer.open.side_effect = IOError("Can't open file") downloader = MultipartDownloader(client, TransferConfig(), os_layer, SequentialExecutor) # We're verifying that the exception raised from the IO future # propogates back up via download_file(). with self.assertRaisesRegexp(IOError, "Can't open file"): downloader.download_file('bucket', 'key', 'filename', len(b'asdf'), {})
def test_assume_role_to_write_multipart(self): client = self._assume_role_session_client_with_write_access(Bucket, "key") from boto3.s3.transfer import TransferConfig data = b"1234567" * 1024 * 1024 # 7MB => 5MB+2MB parts client.upload_fileobj( io.BytesIO(data), Bucket, "key", Config=TransferConfig(multipart_threshold=5 * 1024 * 1024), ) self.assertEqual(minio.get_object_with_data(Bucket, "key")["Body"], data)
def __init__(self, endpoint_url, bucket): s3_pool_size = 150 boto_config = botocore.config.Config(max_pool_connections=s3_pool_size) self._s3r = boto3.resource('s3', endpoint_url=endpoint_url, config=boto_config) self._s3c = self._s3r.meta.client self._bucket = bucket self._s3b = self._s3r.Bucket(self._bucket) self._s3b.create() self._tx_config = TransferConfig(max_concurrency=s3_pool_size, multipart_threshold=2**20, multipart_chunksize=2**20) self._progress_interval=120