def sync_data_bucket(bucket) -> str: bucket_name = bucket.name region = get_region() s3_resource = boto3.resource("s3", region_name=region) source_bucket = s3_resource.Bucket(SAGEMAKER_SOURCE_DATA_BUCKET) destination_bucket = s3_resource.Bucket(bucket_name) temp_dir = "/tmp/ack_s3_data" # awscli is not installed in test-infra container hence use boto3 to copy in us-west-2 if region == "us-west-2": duplicate_bucket_contents(source_bucket, destination_bucket) # above method does an async copy # TODO: find a way to remove random wait time.sleep(180) else: # workaround to copy if buckets are across regions # TODO: check if there is a better way and merge to test-infra subprocess.call(["mkdir", f"{temp_dir}"]) subprocess.call([ "aws", "s3", "sync", f"s3://{SAGEMAKER_SOURCE_DATA_BUCKET}", f"./{temp_dir}/", "--quiet", ]) subprocess.call([ "aws", "s3", "sync", f"./{temp_dir}/", f"s3://{bucket_name}", "--quiet" ]) logging.info(f"Synced data bucket") return bucket
def create_subnet(vpc_id: str) -> str: region = identity.get_region() ec2 = boto3.client("ec2", region_name=region) resp = ec2.create_subnet( CidrBlock=VPC_SUBNET_CIDR_BLOCK, VpcId=vpc_id, ) subnet_id = resp['Subnet']['SubnetId'] # TODO(jaypipes): Put a proper waiter here... time.sleep(3) subnets = ec2.describe_subnets(SubnetIds=[subnet_id]) if len(subnets['Subnets']) != 1: raise RuntimeError( f"failed to describe subnet we just created '{subnet_id}'", ) subnet = subnets['Subnets'][0] subnet_state = subnet['State'] if subnet_state != "available": raise RuntimeError( f"Subnet we just created '{subnet_id}' is not available. current state: {subnet_state}", ) logging.info(f"Created VPC Subnet {subnet_id}") return subnet_id
def delete_subnet(subnet_id: str): region = get_region() ec2 = boto3.client("ec2", region_name=region) ec2.delete_subnet(SubnetId=subnet_id) logging.info(f"Deleted VPC Subnet {subnet_id}")
def create_vpc() -> str: region = get_region() ec2 = boto3.client("ec2", region_name=region) logging.debug(f"Creating VPC with CIDR {VPC_CIDR}") resp = ec2.create_vpc(CidrBlock=VPC_CIDR, ) vpc_id = resp['Vpc']['VpcId'] # TODO(jaypipes): Put a proper waiter here... time.sleep(3) vpcs = ec2.describe_vpcs(VpcIds=[vpc_id]) if len(vpcs['Vpcs']) != 1: raise RuntimeError( f"failed to describe VPC we just created '{vpc_id}'", ) vpc = vpcs['Vpcs'][0] vpc_state = vpc['State'] if vpc_state != "available": raise RuntimeError( f"VPC we just created '{vpc_id}' is not available. current state: {vpc_state}", ) logging.info(f"Created VPC {vpc_id}") return vpc_id
def create_subnet(vpc_id: str, az_id: str, cidr: str) -> str: region = get_region() ec2 = boto3.client("ec2", region_name=region) logging.debug(f"Creating subnet with CIDR {cidr} in AZ {az_id}") resp = ec2.create_subnet( VpcId=vpc_id, AvailabilityZone=az_id, CidrBlock=cidr, ) subnet_id = resp['Subnet']['SubnetId'] # TODO(jaypipes): Put a proper waiter here... time.sleep(3) subnets = ec2.describe_subnets(SubnetIds=[subnet_id]) if len(subnets['Subnets']) != 1: raise RuntimeError( f"failed to describe subnet we just created '{subnet_id}'", ) subnet = subnets['Subnets'][0] subnet_state = subnet['State'] if subnet_state != "available": raise RuntimeError( f"Subnet we just created '{subnet_id}' is not available. current state: {subnet_state}", ) logging.info(f"Created VPC Subnet {subnet_id} in AZ {az_id}") return subnet_id
def detach_policies_and_delete_role(iam_role_name: str, iam_policy_arns: list): region = get_region() iam_client = boto3.client("iam", region_name=region) for iam_policy_arn in iam_policy_arns: iam_client.detach_role_policy(RoleName=iam_role_name, PolicyArn=iam_policy_arn) iam_client.delete_role(RoleName=iam_role_name) logging.info(f"Deleted role {iam_role_name}")
def delete_vpc(vpc_id: str): region = get_region() ec2 = boto3.client("ec2", region_name=region) ec2.delete_vpc(VpcId=vpc_id) logging.info(f"Deleted VPC {vpc_id}")
def delete_sqs_queue(queue_url: str) -> str: region = get_region() sqs_client = boto3.client('sqs', region_name=region) sqs_client.delete_queue( QueueUrl=queue_url, ) logging.info(f"Deleted SQS queue {queue_url}")
def delete_db_subnet_group(db_subnet_group_name: str): region = get_region() rds = boto3.client("rds", region_name=region) rds.delete_db_subnet_group(DBSubnetGroupName=db_subnet_group_name, ) logging.info(f"Deleted DBSubnetGroup {db_subnet_group_name}")
def create_lambda_authorizer(authorizer_role_arn: str) -> str: region = get_region() lambda_client = boto3.client("lambda", region) try: lambda_client.get_function(FunctionName=AUTHORIZER_FUNCTION_NAME) raise RuntimeError( f"Expected {AUTHORIZER_FUNCTION_NAME} function to not exist. Did previous test cleanup" f" successfully?") except lambda_client.exceptions.ResourceNotFoundException: pass with tempfile.TemporaryDirectory() as tempdir: current_directory = os.path.dirname(os.path.realpath(__file__)) index_zip = ZipFile(f'{tempdir}/index.zip', 'w') index_zip.write(f'{current_directory}/resources/index.js', 'index.js') index_zip.close() with open(f'{tempdir}/index.zip', 'rb') as f: b64_encoded_zip_file = f.read() response = lambda_client.create_function( FunctionName=AUTHORIZER_FUNCTION_NAME, Role=authorizer_role_arn, Handler='index.handler', Runtime='nodejs12.x', Code={'ZipFile': b64_encoded_zip_file}) return response['FunctionArn']
def delete_dynamodb_table(table_name: str) -> str: region = get_region() ddb_client = boto3.client('dynamodb', region_name=region) ddb_client.delete_table( TableName=table_name, ) logging.info(f"Deleted DynamoDB table {table_name}")
def create_data_bucket() -> str: region = get_region() account_id = get_account_id() bucket_name = resources.random_suffix_name( f"ack-data-bucket-{region}-{account_id}", 63) s3 = boto3.client("s3", region_name=region) if region == "us-east-1": s3.create_bucket(Bucket=bucket_name) else: s3.create_bucket( Bucket=bucket_name, CreateBucketConfiguration={"LocationConstraint": region}) logging.info(f"Created SageMaker data bucket {bucket_name}") s3_resource = boto3.resource("s3", region_name=region) source_bucket = s3_resource.Bucket(SAGEMAKER_SOURCE_DATA_BUCKET) destination_bucket = s3_resource.Bucket(bucket_name) duplicate_bucket_contents(source_bucket, destination_bucket) logging.info(f"Synced data bucket") return bucket_name
def delete_authorizer_function(function_name: str): region = get_region() lambda_client = boto3.client("lambda", region_name=region) try: lambda_client.delete_function(FunctionName=function_name) except lambda_client.exceptions.ResourceNotFoundException: pass
def test_smoke(self, lambda_client): resource_name = random_suffix_name("lambda-csc", 24) resources = get_bootstrap_resources() logging.debug(resources) replacements = REPLACEMENT_VALUES.copy() replacements["AWS_REGION"] = get_region() replacements["CODE_SIGNING_CONFIG_NAME"] = resource_name replacements["SIGNING_PROFILE_VERSION_ARN"] = resources.SigningProfileVersionArn # Load Lambda CR resource_data = load_lambda_resource( "code_signing_config", additional_replacements=replacements, ) logging.debug(resource_data) # Create k8s resource ref = k8s.CustomResourceReference( CRD_GROUP, CRD_VERSION, RESOURCE_PLURAL, resource_name, namespace="default", ) k8s.create_custom_resource(ref, resource_data) cr = k8s.wait_resource_consumed_by_controller(ref) assert cr is not None assert k8s.get_resource_exists(ref) codeSigningConfigARN = cr['status']['ackResourceMetadata']['arn'] time.sleep(CREATE_WAIT_AFTER_SECONDS) # Check Lambda code signing config exists exists = self.code_signing_config_exists(lambda_client, codeSigningConfigARN) assert exists # Update cr cr["spec"]["description"] = "new description" # Patch k8s resource k8s.patch_custom_resource(ref, cr) time.sleep(UPDATE_WAIT_AFTER_SECONDS) # Check code signing config description csc = self.get_code_signing_config(lambda_client, codeSigningConfigARN) assert csc is not None assert csc["Description"] == "new description" # Delete k8s resource _, deleted = k8s.delete_custom_resource(ref) assert deleted time.sleep(DELETE_WAIT_AFTER_SECONDS) # Check Lambda code signing config doesn't exist exists = self.code_signing_config_exists(lambda_client, codeSigningConfigARN) assert not exists
def create_sqs_queue(queue_name: str) -> str: region = get_region() sqs_client = boto3.resource('sqs', region_name=region) logging.debug(f"Creating SQS queue {queue_name}") resp = sqs_client.create_queue( QueueName=queue_name, ) logging.info(f"Created SQS queue {queue_name}") return resp.attributes['QueueArn'], resp.url
def delete_data_bucket(bucket_name: str): region = get_region() s3_resource = boto3.resource("s3", region_name=region) bucket = s3_resource.Bucket(bucket_name) bucket.objects.all().delete() bucket.delete() logging.info(f"Deleted data bucket {bucket_name}")
def test_smoke(self, lambda_client, lambda_function): (_, function_resource) = lambda_function lambda_function_name = function_resource["spec"]["name"] resource_name = random_suffix_name("lambda-alias", 24) replacements = REPLACEMENT_VALUES.copy() replacements["AWS_REGION"] = get_region() replacements["ALIAS_NAME"] = resource_name replacements["FUNCTION_NAME"] = lambda_function_name replacements["FUNCTION_VERSION"] = "$LATEST" # Load alias CR resource_data = load_lambda_resource( "alias", additional_replacements=replacements, ) logging.debug(resource_data) # Create k8s resource ref = k8s.CustomResourceReference( CRD_GROUP, CRD_VERSION, RESOURCE_PLURAL, resource_name, namespace="default", ) k8s.create_custom_resource(ref, resource_data) cr = k8s.wait_resource_consumed_by_controller(ref) assert cr is not None assert k8s.get_resource_exists(ref) time.sleep(CREATE_WAIT_AFTER_SECONDS) # Check alias exists alias = self.alias_exist(lambda_client, resource_name, lambda_function_name) assert alias is not None # Update cr cr["spec"]["description"] = "" # Patch k8s resource k8s.patch_custom_resource(ref, cr) time.sleep(UPDATE_WAIT_AFTER_SECONDS) # Check alias description alias = self.get_alias(lambda_client, resource_name, lambda_function_name) assert alias is not None assert alias["Description"] == "" # Delete k8s resource _, deleted = k8s.delete_custom_resource(ref) assert deleted time.sleep(DELETE_WAIT_AFTER_SECONDS) # Check alias doesn't exist exists = self.get_alias(lambda_client, resource_name, lambda_function_name) assert not exists
def clean_up_and_delete_bucket(bucket_name: str): region = get_region() s3_client = boto3.client("s3", region_name=region) resp = s3_client.list_objects(Bucket=bucket_name) for object in resp['Contents']: s3_client.delete_object(Bucket=bucket_name, Key=object['Key']) s3_client.delete_bucket( Bucket=bucket_name, ) logging.info(f"Deleted bucket {bucket_name}")
def create_db_subnet_group(db_subnet_group_name: str, subnet_az1_id: str, subnet_az2_id:str): region = get_region() rds = boto3.client("rds", region_name=region) logging.debug(f"Creating DBSubnetGroup with name {db_subnet_group_name}") rds.create_db_subnet_group( DBSubnetGroupName=db_subnet_group_name, DBSubnetGroupDescription='DBSubnetGroup for e2e testing of ACK rds-controller', SubnetIds=[subnet_az1_id, subnet_az2_id], ) logging.info(f"Created DBSubnetGroup {db_subnet_group_name}")
def detach_policy_and_delete_role(iam_role_name: str, iam_policy_arn: str): region = get_region() iam_client = boto3.client("iam", region_name=region) try: iam_client.detach_role_policy(RoleName=iam_role_name, PolicyArn=iam_policy_arn) except iam_client.exceptions.NoSuchEntityException: pass try: iam_client.delete_role(RoleName=iam_role_name) except iam_client.exceptions.NoSuchEntityException: pass
def create_bucket(bucket_name: str): region = get_region() s3_client = boto3.resource('s3') logging.debug(f"Creating s3 data bucket {bucket_name}") try: s3_client.create_bucket( Bucket=bucket_name, CreateBucketConfiguration={"LocationConstraint": region} ) except s3_client.exceptions.BucketAlreadyExists: raise RuntimeError(f"Expected {bucket_name} bucket to not exist." f" Did previous test cleanup successfully?") logging.info(f"Created bucket {bucket_name}")
def service_bootstrap() -> dict: logging.getLogger().setLevel(logging.INFO) region = get_region() vpc_id = create_vpc() az1 = f"{region}a" subnet_az1_id = create_subnet(vpc_id, az1, SUBNET_AZ1_CIDR) az2 = f"{region}b" subnet_az2_id = create_subnet(vpc_id, az2, SUBNET_AZ2_CIDR) return TestBootstrapResources( vpc_id, subnet_az1_id, subnet_az2_id, ).__dict__
def test_smoke(self, dynamodb_client, dynamodb_table): (_, table_resource) = dynamodb_table # Global Tables must have the same name as dynamodb Tables global_table_name = table_resource["spec"]["tableName"] replacements = REPLACEMENT_VALUES.copy() replacements["REGION_NAME"] = get_region() replacements["TABLE_NAME"] = global_table_name replacements["GLOBAL_TABLE_NAME"] = global_table_name # Load GLobal Table CR resource_data = load_dynamodb_resource( "global_table", additional_replacements=replacements, ) logging.debug(resource_data) # Create k8s resource ref = k8s.CustomResourceReference( CRD_GROUP, CRD_VERSION, RESOURCE_PLURAL, global_table_name, namespace="default", ) k8s.create_custom_resource(ref, resource_data) cr = k8s.wait_resource_consumed_by_controller(ref) assert cr is not None assert k8s.get_resource_exists(ref) wait_for_cr_status( ref, "globalTableStatus", "ACTIVE", 10, 5, ) # Check DynamoDB Global Table exists exists = self.global_table_exists(dynamodb_client, global_table_name) assert exists _, deleted = k8s.delete_custom_resource(ref) assert deleted is True time.sleep(DELETE_WAIT_AFTER_SECONDS) exists = self.global_table_exists(dynamodb_client, global_table_name) assert not exists
def get_resource_tags(resource_arn: str): region = get_region() ddb_client = boto3.client('dynamodb', region_name=region) tags = [] next_token = "" while True: resp = ddb_client.list_tags_of_resource( ResourceArn=resource_arn, NextToken=next_token, ) tags += resp['Tags'] if not 'NextToken' in resp.keys(): break next_token = resp['NextToken'] return tags
def create_data_bucket() -> str: region = get_region() account_id = get_account_id() bucket_name = resources.random_suffix_name( f"ack-data-bucket-{region}-{account_id}", 63) s3 = boto3.client("s3", region_name=region) if region == "us-east-1": s3.create_bucket(Bucket=bucket_name) else: s3.create_bucket( Bucket=bucket_name, CreateBucketConfiguration={"LocationConstraint": region}) logging.info(f"Created SageMaker data bucket {bucket_name}") s3_resource = boto3.resource("s3", region_name=region) source_bucket = s3_resource.Bucket(SAGEMAKER_SOURCE_DATA_BUCKET) destination_bucket = s3_resource.Bucket(bucket_name) temp_dir = "/tmp/ack_s3_data" # awscli is not installed in test-infra container hence use boto3 to copy in us-west-2 if region == "us-west-2": duplicate_bucket_contents(source_bucket, destination_bucket) # above method does an async copy # TODO: find a way to remove random wait time.sleep(180) else: # workaround to copy if buckets are across regions # TODO: check if there is a better way and merge to test-infra subprocess.call(["mkdir", f"{temp_dir}"]) subprocess.call([ "aws", "s3", "sync", f"s3://{SAGEMAKER_SOURCE_DATA_BUCKET}", f"./{temp_dir}/", "--quiet", ]) subprocess.call([ "aws", "s3", "sync", f"./{temp_dir}/", f"s3://{bucket_name}", "--quiet" ]) logging.info(f"Synced data bucket") return bucket_name
def authorizer_resource(api_resource): random_suffix = (''.join( random.choice(string.ascii_lowercase) for _ in range(6))) authorizer_resource_name = test_resource_values[ 'AUTHORIZER_NAME'] + f'-{random_suffix}' test_resource_values['AUTHORIZER_NAME'] = authorizer_resource_name authorizer_uri = f'arn:aws:apigateway:{get_region()}:lambda:path/2015-03-31/functions/{get_bootstrap_resources().AuthorizerFunctionArn}/invocations' test_resource_values["AUTHORIZER_URI"] = authorizer_uri authorizer_ref, authorizer_data = helper.authorizer_ref_and_data( authorizer_resource_name=authorizer_resource_name, replacement_values=test_resource_values) if k8s.get_resource_exists(authorizer_ref): raise Exception( f"expected {authorizer_resource_name} to not exist. Did previous test cleanup?" ) logging.debug( f"apigatewayv2 authorizer resource. name: {authorizer_resource_name}, data: {authorizer_data}" ) k8s.create_custom_resource(authorizer_ref, authorizer_data) cr = k8s.wait_resource_consumed_by_controller(authorizer_ref) assert cr is not None assert k8s.get_resource_exists(authorizer_ref) authorizer_id = cr['status']['authorizerID'] test_resource_values['AUTHORIZER_ID'] = authorizer_id # add permissions for apigateway to invoke authorizer lambda authorizer_arn = "arn:aws:execute-api:{region}:{account}:{api_id}/authorizers/{authorizer_id}".format( region=get_region(), account=get_account_id(), api_id=test_resource_values['API_ID'], authorizer_id=authorizer_id) lambda_client = boto3.client("lambda") lambda_client.add_permission( FunctionName=get_bootstrap_resources().AuthorizerFunctionName, StatementId= f'apigatewayv2-authorizer-invoke-permissions-{random_suffix}', Action='lambda:InvokeFunction', Principal='apigateway.amazonaws.com', SourceArn=authorizer_arn) yield authorizer_ref, cr k8s.delete_custom_resource(authorizer_ref)
def create_execution_role() -> str: region = get_region() role_name = resources.random_suffix_name(f"ack-sagemaker-execution-role", 63) iam = boto3.client("iam", region_name=region) iam.create_role( RoleName=role_name, AssumeRolePolicyDocument=json.dumps({ "Version": "2012-10-17", "Statement": [{ "Effect": "Allow", "Principal": { "Service": "sagemaker.amazonaws.com" }, "Action": "sts:AssumeRole", }], }), Description= "SageMaker execution role for ACK integration and canary tests", ) # random sleep to prevent throttling time.sleep(random.randrange(1, 3)) iam.attach_role_policy( RoleName=role_name, PolicyArn="arn:aws:iam::aws:policy/AmazonSageMakerFullAccess", ) # random sleep to prevent throttling time.sleep(random.randrange(1, 3)) iam.attach_role_policy( RoleName=role_name, PolicyArn="arn:aws:iam::aws:policy/AmazonS3FullAccess") iam_resource = iam.get_role(RoleName=role_name) resource_arn = iam_resource["Role"]["Arn"] # There appears to be a delay in role availability after role creation # resulting in failure that role is not present. So adding a delay # to allow for the role to become available time.sleep(10) logging.info(f"Created SageMaker execution role {resource_arn}") return resource_arn
def service_bootstrap() -> dict: logging.getLogger().setLevel(logging.INFO) region = get_region() vpc_id = create_vpc() az1 = f"{region}a" subnet_az1_id = create_subnet(vpc_id, az1, SUBNET_AZ1_CIDR) az2 = f"{region}b" subnet_az2_id = create_subnet(vpc_id, az2, SUBNET_AZ2_CIDR) db_subnet_group_name = random_suffix_name("ack-test-subnet-group", 30) create_db_subnet_group(db_subnet_group_name, subnet_az1_id, subnet_az2_id) return TestBootstrapResources( vpc_id, subnet_az1_id, subnet_az2_id, db_subnet_group_name ).__dict__
def ensure_signing_profile(signing_profile_name: str, platform_id: str) -> str: region = get_region() signer_client = boto3.client("signer", region_name=region) # Signing profiles cannot be deleted. We just reuse the same signing profile # for ACK lambda controller e2e tests. try: resp = signer_client.get_signing_profile( profileName=signing_profile_name, ) return resp['profileVersionArn'] except: resp = signer_client.put_signing_profile( profileName=signing_profile_name, platformId=platform_id, ) logging.info(f"Created signing profile {signing_profile_name}") return resp['profileVersionArn']
def lambda_function(): resource_name = random_suffix_name("lambda-function", 24) resources = get_bootstrap_resources() replacements = REPLACEMENT_VALUES.copy() replacements["FUNCTION_NAME"] = resource_name replacements["BUCKET_NAME"] = resources.FunctionsBucketName replacements["LAMBDA_ROLE"] = resources.LambdaESMRoleARN replacements["LAMBDA_FILE_NAME"] = resources.LambdaFunctionFileZip replacements["RESERVED_CONCURRENT_EXECUTIONS"] = "0" replacements["CODE_SIGNING_CONFIG_ARN"] = "" replacements["AWS_REGION"] = get_region() # Load function CR resource_data = load_lambda_resource( "function", additional_replacements=replacements, ) logging.debug(resource_data) # Create k8s resource function_reference = k8s.CustomResourceReference( CRD_GROUP, CRD_VERSION, "functions", resource_name, namespace="default", ) # Create lambda function k8s.create_custom_resource(function_reference, resource_data) function_resource = k8s.wait_resource_consumed_by_controller( function_reference) assert function_resource is not None assert k8s.get_resource_exists(function_reference) time.sleep(CREATE_WAIT_AFTER_SECONDS) yield (function_reference, function_resource) _, deleted = k8s.delete_custom_resource(function_reference) assert deleted