def test_token_error(self, mock_client, logger): mock_client.return_value.send_message.side_effect = ClientError( {"Error": { "Code": "Hello", "Message": "hello" }}, "SendMessage") with self.assertRaises(ClientError): self.run_test() assert len(logger.info.call_args_list) == 0 mock_client.return_value.send_message.side_effect = ClientError( {"Error": { "Code": "AccessDenied", "Message": "Hello" }}, "SendMessage") self.run_test() assert len(logger.info.call_args_list) == 1 assert (logger.info.call_args_list[0][0][0] == "sentry_plugins.amazon_sqs.access_token_invalid")
def test_invalid_s3_bucket(self, mock_client, logger): self.plugin.set_option("s3_bucket", "bad_bucket", self.project) mock_client.return_value.put_object.side_effect = ClientError( {"Error": { "Code": "NoSuchBucket" }}, "PutObject", ) self.run_test() assert len(logger.info.call_args_list) == 2 assert logger.info.call_args_list[1][0][ 0] == "sentry_plugins.amazon_sqs.s3_bucket_invalid"
def test_s3_custom_policy(self): s3 = self.subscribe.s3 s3.head_bucket.side_effect = ClientError( {'Error': { 'Code': '404', 'Message': '' }}, 'HeadBucket') self.subscribe.setup_new_bucket('test', 'logs', custom_policy='{}') s3.get_object.assert_not_called() s3.put_bucket_policy.assert_called_with(Bucket='test', Policy='{}')
def test_s3_uses_regionalized_policy(self): s3 = self.subscribe.s3 s3.head_bucket.side_effect = ClientError( {'Error': { 'Code': '404', 'Message': '' }}, 'HeadBucket') self.subscribe.setup_new_bucket('test', 'logs') s3.get_object.assert_called_with( Bucket='awscloudtrail-policy-us-east-1', Key=ANY)
def _make_api_call(self, operation_name, operation_kwargs): """ This private method is here for two reasons: 1. It's faster to avoid using botocore's response parsing 2. It provides a place to monkey patch requests for unit testing """ operation_model = self.client._service_model.operation_model( operation_name) request_dict = self.client._convert_to_request_dict( operation_kwargs, operation_model) prepared_request = self.client._endpoint.create_request( request_dict, operation_model) response = self.requests_session.send(prepared_request) if response.status_code >= 300: data = response.json() botocore_expected_format = { "Error": { "Message": data.get("message", ""), "Code": data.get("__type", "") } } raise ClientError(botocore_expected_format, operation_name) data = response.json() # Simulate botocore's binary attribute handling if ITEM in data: for attr in six.itervalues(data[ITEM]): _convert_binary(attr) if ITEMS in data: for item in data[ITEMS]: for attr in six.itervalues(item): _convert_binary(attr) if RESPONSES in data: for item_list in six.itervalues(data[RESPONSES]): for item in item_list: for attr in six.itervalues(item): _convert_binary(attr) if LAST_EVALUATED_KEY in data: for attr in six.itervalues(data[LAST_EVALUATED_KEY]): _convert_binary(attr) if UNPROCESSED_KEYS in data: for item_list in six.itervalues(data[UNPROCESSED_KEYS]): for item in item_list: for attr in six.itervalues(item): _convert_binary(attr) if UNPROCESSED_ITEMS in data: for item_mapping in six.itervalues(data[UNPROCESSED_ITEMS]): for item in six.itervalues(item_mapping): for attr in six.itervalues(item): _convert_binary(attr) return data
def test_s3_create_non_us_east_1(self): # Because this is outside of us-east-1, it should create # a bucket configuration with a location constraint. s3 = self.subscribe.s3 self.subscribe.region_name = 'us-west-2' s3.head_bucket.side_effect = ClientError( {'Error': {'Code': '404', 'Message': ''}}, 'HeadBucket') self.subscribe.setup_new_bucket('test', 'logs') args, kwargs = s3.create_bucket.call_args self.assertIn('CreateBucketConfiguration', kwargs) bucket_config = kwargs['CreateBucketConfiguration'] self.assertEqual(bucket_config['LocationConstraint'], 'us-west-2')
def fake_describe_spot_instance_requests(*args, **kwargs): curr_call = this_call[0] this_call[0] += 1 if curr_call == 0: raise ClientError({'Error': {'Code': 'InvalidSpotInstanceRequestID.NotFound'}}, 'DescribeSpotInstanceRequests') if curr_call == 1: return orig_describe_instance(*args, **kwargs) response = orig_describe_instance(*args, **kwargs) instances = r.instances.filter(Filters=[{'Name': 'instance-state-name', 'Values': ['running']}]) response['SpotInstanceRequests'][0]['Status']['Code'] = 'fulfilled' response['SpotInstanceRequests'][0]['InstanceId'] = list(instances)[0].id return response
def test_message_group_error(self, mock_client, logger): mock_client.return_value.send_message.side_effect = ClientError( { "Error": { "Code": "MissingParameter", "Message": "The request must contain the parameter MessageGroupId.", } }, "SendMessage", ) self.run_test() assert len(logger.info.call_args_list) == 1 assert ( logger.info.call_args_list[0][0][0] == "sentry_plugins.amazon_sqs.missing_message_group_id" )
def test_s3_create(self): iam = self.subscribe.iam s3 = self.subscribe.s3 s3.head_bucket.side_effect = ClientError( {'Error': {'Code': '404', 'Message': ''}}, 'HeadBucket') self.subscribe.setup_new_bucket('test', 'logs') iam.get_user.assert_called() s3.get_object.assert_called() s3.create_bucket.assert_called() s3.put_bucket_policy.assert_called() s3.delete_bucket.assert_not_called() args, kwargs = s3.create_bucket.call_args self.assertNotIn('create_bucket_configuration', kwargs)
def test_s3_create(self): iam = self.subscribe.iam s3 = self.subscribe.s3 s3.head_bucket.side_effect = ClientError( {'Error': {'Code': '404', 'Message': ''}}, 'HeadBucket') self.subscribe.setup_new_bucket('test', 'logs') iam.get_user.assert_called_with() s3.get_object.assert_called_with( Bucket='awscloudtrail-policy-us-east-1', Key='policy/S3/AWSCloudTrail-S3BucketPolicy-2014-12-17.json', ) s3.create_bucket.assert_called_with(Bucket='test') s3.put_bucket_policy.assert_called_with( Bucket='test', Policy=u'{"Statement": []}' ) self.assertFalse(s3.delete_bucket.called) args, kwargs = s3.create_bucket.call_args self.assertNotIn('create_bucket_configuration', kwargs)
def test_policy_from_paramfile(self, create_client_mock): client = Mock() # S3 mock calls client.get_user.return_value = {'User': {'Arn': ':::::'}} client.head_bucket.side_effect = ClientError( {'Error': {'Code': 404, 'Message': ''}}, 'HeadBucket') # CloudTrail mock call client.describe_trails.return_value = {} create_client_mock.return_value = client policy = '{"Statement": []}' with temporary_file('w') as f: f.write(policy) f.flush() command = ( 'cloudtrail create-subscription --s3-new-bucket foo ' '--name bar --s3-custom-policy file://{0}'.format(f.name)) self.run_cmd(command, expected_rc=0) # Ensure that the *contents* of the file are sent as the policy # parameter to S3. client.put_bucket_policy.assert_called_with( Bucket='foo', Policy=policy)
def generate_layout(user_params, system_params): file_name = "compaction_{0}_{1}.py".format( user_params['SourceDatabaseName'], user_params['SourceTableName']) session = boto3.Session(region_name=system_params['region']) glue = session.client('glue') s3_client = session.client('s3') workflow_name = user_params['WorkflowName'] # Validate params validate_params(user_params, system_params) # Create Source Database if it does not exists try: glue.create_database( DatabaseInput={'Name': user_params['SourceDatabaseName']}) print("New database is created.") except glue.exceptions.AlreadyExistsException: print("Existing database is used.") location = {'LocationConstraint': system_params['region']} # Creating script bucket the_script_bucket = f"aws-glue-scripts-{system_params['accountId']}-{system_params['region']}" try: s3_client.head_bucket(Bucket=the_script_bucket) print("Script bucket already exists: ", the_script_bucket) except ClientError as ce: print(ce) print(ce.response['ResponseMetadata']) print("Creating script bucket: ", the_script_bucket) bucket = s3_client.create_bucket(Bucket=the_script_bucket, CreateBucketConfiguration=location) # Creating temp bucket the_temp_bucket = f"aws-glue-temporary-{system_params['accountId']}-{system_params['region']}" the_temp_prefix = f"{workflow_name}/" the_temp_location = f"s3://{the_temp_bucket}/{the_temp_prefix}" try: s3_client.head_bucket(Bucket=the_temp_bucket) print("Temp bucket already exists: ", the_temp_bucket) except ClientError as ce: print(ce) print(ce.response['ResponseMetadata']) print("Creating temp bucket: ", the_temp_bucket) bucket = s3_client.create_bucket(Bucket=the_temp_bucket, CreateBucketConfiguration=location) # Creating manifest bucket if user_params['EnableManifest']: the_manifest_bucket = f"aws-glue-blueprint-compaction-manifest-{system_params['accountId']}-{system_params['region']}" the_manifest_prefix = f"{workflow_name}/" the_manifest_location = f"s3://{the_manifest_bucket}/{the_manifest_prefix}" try: s3_client.head_bucket(Bucket=the_manifest_bucket) print("Manifest bucket already exists: ", the_manifest_bucket) except ClientError as ce: print(ce) print(ce.response['ResponseMetadata']) print("Creating Manifest bucket: ", the_manifest_bucket) bucket = s3_client.create_bucket( Bucket=the_manifest_bucket, CreateBucketConfiguration=location) # Upload job script to script bucket the_script_key = f"{workflow_name}/{file_name}" the_script_location = f"s3://{the_script_bucket}/{the_script_key}" with open("compaction/compaction.py", "rb") as f: s3_client.upload_fileobj(f, the_script_bucket, the_script_key) jobs = [] crawlers = [] command = { "Name": "glueetl", "ScriptLocation": the_script_location, "PythonVersion": "3" } arguments = { "--region": system_params['region'], "--TempDir": the_temp_location, "--job-bookmark-option": "job-bookmark-disable", "--job-language": "python", "--enable-s3-parquet-optimized-committer": "", "--enable-rename-algorithm-v2": "", "--enable-metrics": "", "--enable-continuous-cloudwatch-log": "true", "--enable_size_control": user_params['EnableSizeControl'], "--input_database": user_params['SourceDatabaseName'], "--input_table": user_params['SourceTableName'], "--input_format": user_params['InputDataFormat'], "--output_path": user_params['OutputDataLocation'], "--desired_size_mb": user_params['DesiredFileSizeMB'], "--enable_manifest": user_params['EnableManifest'] } if user_params['InputDataFormatOptions']: arguments["--input_format_options"] = user_params[ 'InputDataFormatOptions'] if user_params['EnableManifest']: arguments["--manifest_path"] = the_manifest_location crawler_source = None try: # Get the source table definition and validate the parameters with it. src_table = glue.get_table( DatabaseName=user_params['SourceDatabaseName'], Name=user_params['SourceTableName']) if src_table['Table']['StorageDescriptor']['Location'] == user_params[ 'OutputDataLocation']: err_msg = 'Location on the source table is same as OutputDataLocation.' raise ClientError( { "Error": { "Code": "InvalidInputException", "Message": err_msg } }, 'validate_params') if user_params['InputDataLocation'] and user_params['InputDataLocation'] != "" \ and src_table['Table']['StorageDescriptor']['Location'] != user_params['InputDataLocation']: err_msg = 'Location on the source table is different from InputDataLocation.' raise ClientError( { "Error": { "Code": "InvalidInputException", "Message": err_msg } }, 'validate_params') print("Existing table is used.") except glue.exceptions.EntityNotFoundException: if user_params['InputDataLocation'] and user_params[ 'InputDataLocation'] != "": # Create a new source table if it does not exist glue.create_table(DatabaseName=user_params['SourceDatabaseName'], TableInput={ 'Name': user_params['SourceTableName'], 'StorageDescriptor': { 'Location': user_params['InputDataLocation'] } }) print("New table is created.") else: err_msg = 'Source table does not exist, and input data location is not provided.' raise ClientError( { "Error": { "Code": "InvalidInputException", "Message": err_msg } }, 'validate_params') if user_params[ 'InputDataLocation'] and user_params['InputDataLocation'] != "": targets_source = { "CatalogTargets": [{ "DatabaseName": user_params['SourceDatabaseName'], "Tables": [user_params['SourceTableName']] }] } crawler_source = Crawler( Name="{}_crawler_source".format(workflow_name), Role=user_params['IAMRole'], Grouping={"TableGroupingPolicy": "CombineCompatibleSchemas"}, Targets=targets_source, SchemaChangePolicy={"DeleteBehavior": "LOG"}, ) crawlers.append(crawler_source) if crawler_source: transform_job = Job(Name="{0}_compaction_{1}_{2}".format( workflow_name, user_params['SourceDatabaseName'], user_params['SourceTableName']), Command=command, Role=user_params['IAMRole'], DefaultArguments=arguments, WorkerType="G.1X", NumberOfWorkers=user_params['NumberOfWorkers'], GlueVersion="2.0", DependsOn={crawler_source: "SUCCEEDED"}) else: transform_job = Job(Name="{0}_compaction_{1}_{2}".format( workflow_name, user_params['SourceDatabaseName'], user_params['SourceTableName']), Command=command, Role=user_params['IAMRole'], DefaultArguments=arguments, WorkerType="G.1X", NumberOfWorkers=user_params['NumberOfWorkers'], GlueVersion="2.0") jobs.append(transform_job) # Create destination database if it does not exists try: glue.create_database( DatabaseInput={'Name': user_params['DestinationDatabaseName']}) print("New database is created.") except glue.exceptions.AlreadyExistsException: print("Existing database is used.") try: # Get the destination table and validate the parameters with it. dst_table = glue.get_table( DatabaseName=user_params['DestinationDatabaseName'], Name=user_params['DestinationTableName']) if dst_table['Table']['StorageDescriptor']['Location'] != user_params[ 'OutputDataLocation']: err_msg = 'Location on the destination table is different from the OutputDataLocation.' raise ClientError( { "Error": { "Code": "InvalidInputException", "Message": err_msg } }, 'validate_params') print("Existing table is used.") except glue.exceptions.EntityNotFoundException: # Create destination table if it does not exist glue.create_table(DatabaseName=user_params['DestinationDatabaseName'], TableInput={ 'Name': user_params['DestinationTableName'], 'StorageDescriptor': { 'Location': user_params['OutputDataLocation'] } }) print("New table is created.") targets_destination = { "CatalogTargets": [{ "DatabaseName": user_params['DestinationDatabaseName'], "Tables": [user_params['DestinationTableName']] }] } crawler_destination = Crawler( Name="{}_crawler_destination".format(workflow_name), Role=user_params['IAMRole'], Targets=targets_destination, SchemaChangePolicy={"DeleteBehavior": "LOG"}, DependsOn={transform_job: "SUCCEEDED"}) crawlers.append(crawler_destination) if user_params['Frequency']: if user_params['Frequency'] == 'Custom': schedule = user_params['FrequencyCronFormat'] else: schedule = generate_schedule(user_params['Frequency']) else: schedule = None workflow = Workflow(Name=workflow_name, Entities=Entities(Jobs=jobs, Crawlers=crawlers), OnSchedule=schedule) return workflow