Esempio n. 1
0
 def make_emr_client(self):
     '''Creates a boto3 EMR client.
     '''
     raw_emr_client = boto3.client(
         'emr',
         aws_access_key_id=self.aws_access_key_id,
         aws_secret_access_key=self.aws_secret_access_key,
         region_name=self.region,
     )
     return _wrap_aws_client(raw_emr_client, min_backoff=self.check_cluster_every)
Esempio n. 2
0
    def wait_for_log(self,
                     context,
                     log_bucket,
                     log_key,
                     waiter_delay=30,
                     waiter_max_attempts=20):
        '''Wait for gzipped EMR logs to appear on S3. Note that EMR syncs logs to S3 every 5
        minutes, so this may take a long time.

        Args:
            context (SystemPipelineExecutionContext): context, for logging
            log_bucket (str): S3 bucket where log is expected to appear
            log_key (str): S3 key for the log file
            waiter_delay (int): How long to wait between attempts to check S3 for the log file
            waiter_max_attempts (int): Number of attempts before giving up on waiting

        Raises:
            EmrError: Raised if we waited the full duration and the logs did not appear

        Returns:
            str: contents of the log file
        '''
        check.str_param(log_bucket, 'log_bucket')
        check.str_param(log_key, 'log_key')
        check.int_param(waiter_delay, 'waiter_delay')
        check.int_param(waiter_max_attempts, 'waiter_max_attempts')

        context.log.info(
            'Attempting to get log: s3://{log_bucket}/{log_key}'.format(
                log_bucket=log_bucket, log_key=log_key))

        s3 = _wrap_aws_client(boto3.client('s3'),
                              min_backoff=self.check_cluster_every)
        waiter = s3.get_waiter('object_exists')
        try:
            waiter.wait(
                Bucket=log_bucket,
                Key=log_key,
                WaiterConfig={
                    'Delay': waiter_delay,
                    'MaxAttempts': waiter_max_attempts
                },
            )
        except WaiterError as err:
            six.raise_from(
                EmrError('EMR log file did not appear on S3 after waiting'),
                err,
            )
        obj = BytesIO(
            s3.get_object(Bucket=log_bucket, Key=log_key)['Body'].read())
        gzip_file = gzip.GzipFile(fileobj=obj)
        return gzip_file.read().decode('utf-8')
Esempio n. 3
0
File: emr.py Progetto: sd2k/dagster
    def make_emr_client(self):
        """Creates a boto3 EMR client. Construction is wrapped in retries in case client connection
        fails transiently.

        Returns:
            botocore.client.EMR: An EMR client
        """
        raw_emr_client = boto3.client(
            "emr",
            aws_access_key_id=self.aws_access_key_id,
            aws_secret_access_key=self.aws_secret_access_key,
            region_name=self.region,
        )
        return _wrap_aws_client(raw_emr_client, min_backoff=self.check_cluster_every)
Esempio n. 4
0
    def wait_for_log(self,
                     log,
                     log_bucket,
                     log_key,
                     waiter_delay=30,
                     waiter_max_attempts=20):
        """Wait for gzipped EMR logs to appear on S3. Note that EMR syncs logs to S3 every 5
        minutes, so this may take a long time.

        Args:
            log_bucket (str): S3 bucket where log is expected to appear
            log_key (str): S3 key for the log file
            waiter_delay (int): How long to wait between attempts to check S3 for the log file
            waiter_max_attempts (int): Number of attempts before giving up on waiting

        Raises:
            EmrError: Raised if we waited the full duration and the logs did not appear

        Returns:
            str: contents of the log file
        """
        check.str_param(log_bucket, "log_bucket")
        check.str_param(log_key, "log_key")
        check.int_param(waiter_delay, "waiter_delay")
        check.int_param(waiter_max_attempts, "waiter_max_attempts")

        log.info("Attempting to get log: s3://{log_bucket}/{log_key}".format(
            log_bucket=log_bucket, log_key=log_key))

        s3 = _wrap_aws_client(boto3.client("s3"),
                              min_backoff=self.check_cluster_every)
        waiter = s3.get_waiter("object_exists")
        try:
            waiter.wait(
                Bucket=log_bucket,
                Key=log_key,
                WaiterConfig={
                    "Delay": waiter_delay,
                    "MaxAttempts": waiter_max_attempts
                },
            )
        except WaiterError as err:
            six.raise_from(
                EmrError("EMR log file did not appear on S3 after waiting"),
                err,
            )
        obj = BytesIO(
            s3.get_object(Bucket=log_bucket, Key=log_key)["Body"].read())
        gzip_file = gzip.GzipFile(fileobj=obj)
        return gzip_file.read().decode("utf-8")
Esempio n. 5
0
def test_wrap_aws_client():
    client = _wrap_aws_client(boto3.client('s3'), min_backoff=1000)
    res = client.list_buckets()
    assert res['ResponseMetadata']['HTTPStatusCode'] == 200
    assert res['Buckets'] == []
Esempio n. 6
0
def test_wrap_aws_client(mock_s3_resource):
    client = _wrap_aws_client(mock_s3_resource.meta.client, min_backoff=1000)
    res = client.list_buckets()
    assert res["ResponseMetadata"]["HTTPStatusCode"] == 200
    assert res["Buckets"] == []
Esempio n. 7
0
def test_wrap_aws_client():
    client = _wrap_aws_client(boto3.client("s3"), min_backoff=1000)
    res = client.list_buckets()
    assert res["ResponseMetadata"]["HTTPStatusCode"] == 200
    assert res["Buckets"] == []