def make_emr_client(self): '''Creates a boto3 EMR client. ''' raw_emr_client = boto3.client( 'emr', aws_access_key_id=self.aws_access_key_id, aws_secret_access_key=self.aws_secret_access_key, region_name=self.region, ) return _wrap_aws_client(raw_emr_client, min_backoff=self.check_cluster_every)
def wait_for_log(self, context, log_bucket, log_key, waiter_delay=30, waiter_max_attempts=20): '''Wait for gzipped EMR logs to appear on S3. Note that EMR syncs logs to S3 every 5 minutes, so this may take a long time. Args: context (SystemPipelineExecutionContext): context, for logging log_bucket (str): S3 bucket where log is expected to appear log_key (str): S3 key for the log file waiter_delay (int): How long to wait between attempts to check S3 for the log file waiter_max_attempts (int): Number of attempts before giving up on waiting Raises: EmrError: Raised if we waited the full duration and the logs did not appear Returns: str: contents of the log file ''' check.str_param(log_bucket, 'log_bucket') check.str_param(log_key, 'log_key') check.int_param(waiter_delay, 'waiter_delay') check.int_param(waiter_max_attempts, 'waiter_max_attempts') context.log.info( 'Attempting to get log: s3://{log_bucket}/{log_key}'.format( log_bucket=log_bucket, log_key=log_key)) s3 = _wrap_aws_client(boto3.client('s3'), min_backoff=self.check_cluster_every) waiter = s3.get_waiter('object_exists') try: waiter.wait( Bucket=log_bucket, Key=log_key, WaiterConfig={ 'Delay': waiter_delay, 'MaxAttempts': waiter_max_attempts }, ) except WaiterError as err: six.raise_from( EmrError('EMR log file did not appear on S3 after waiting'), err, ) obj = BytesIO( s3.get_object(Bucket=log_bucket, Key=log_key)['Body'].read()) gzip_file = gzip.GzipFile(fileobj=obj) return gzip_file.read().decode('utf-8')
def make_emr_client(self): """Creates a boto3 EMR client. Construction is wrapped in retries in case client connection fails transiently. Returns: botocore.client.EMR: An EMR client """ raw_emr_client = boto3.client( "emr", aws_access_key_id=self.aws_access_key_id, aws_secret_access_key=self.aws_secret_access_key, region_name=self.region, ) return _wrap_aws_client(raw_emr_client, min_backoff=self.check_cluster_every)
def wait_for_log(self, log, log_bucket, log_key, waiter_delay=30, waiter_max_attempts=20): """Wait for gzipped EMR logs to appear on S3. Note that EMR syncs logs to S3 every 5 minutes, so this may take a long time. Args: log_bucket (str): S3 bucket where log is expected to appear log_key (str): S3 key for the log file waiter_delay (int): How long to wait between attempts to check S3 for the log file waiter_max_attempts (int): Number of attempts before giving up on waiting Raises: EmrError: Raised if we waited the full duration and the logs did not appear Returns: str: contents of the log file """ check.str_param(log_bucket, "log_bucket") check.str_param(log_key, "log_key") check.int_param(waiter_delay, "waiter_delay") check.int_param(waiter_max_attempts, "waiter_max_attempts") log.info("Attempting to get log: s3://{log_bucket}/{log_key}".format( log_bucket=log_bucket, log_key=log_key)) s3 = _wrap_aws_client(boto3.client("s3"), min_backoff=self.check_cluster_every) waiter = s3.get_waiter("object_exists") try: waiter.wait( Bucket=log_bucket, Key=log_key, WaiterConfig={ "Delay": waiter_delay, "MaxAttempts": waiter_max_attempts }, ) except WaiterError as err: six.raise_from( EmrError("EMR log file did not appear on S3 after waiting"), err, ) obj = BytesIO( s3.get_object(Bucket=log_bucket, Key=log_key)["Body"].read()) gzip_file = gzip.GzipFile(fileobj=obj) return gzip_file.read().decode("utf-8")
def test_wrap_aws_client(): client = _wrap_aws_client(boto3.client('s3'), min_backoff=1000) res = client.list_buckets() assert res['ResponseMetadata']['HTTPStatusCode'] == 200 assert res['Buckets'] == []
def test_wrap_aws_client(mock_s3_resource): client = _wrap_aws_client(mock_s3_resource.meta.client, min_backoff=1000) res = client.list_buckets() assert res["ResponseMetadata"]["HTTPStatusCode"] == 200 assert res["Buckets"] == []
def test_wrap_aws_client(): client = _wrap_aws_client(boto3.client("s3"), min_backoff=1000) res = client.list_buckets() assert res["ResponseMetadata"]["HTTPStatusCode"] == 200 assert res["Buckets"] == []