def xgboost_model(): resource_name = random_suffix_name("xgboost-model", 32) replacements = REPLACEMENT_VALUES.copy() replacements["MODEL_NAME"] = resource_name model = load_resource_file(SERVICE_NAME, "xgboost_model", additional_replacements=replacements) logging.debug(model) # Create the k8s resource reference = k8s.CustomResourceReference(CRD_GROUP, CRD_VERSION, RESOURCE_PLURAL, resource_name, namespace="default") resource = k8s.create_custom_resource(reference, model) resource = k8s.wait_resource_consumed_by_controller(reference) assert resource is not None yield (reference, resource) # Delete the k8s resource if not already deleted by tests try: k8s.delete_custom_resource(reference) except: pass
def kmeans_processing_job(): resource_name = random_suffix_name("kmeans-processingjob", 32) replacements = REPLACEMENT_VALUES.copy() replacements["PROCESSING_JOB_NAME"] = resource_name processing_job = load_resource_file(SERVICE_NAME, "kmeans_processingjob", additional_replacements=replacements) logging.debug(processing_job) # Create the k8s resource reference = k8s.CustomResourceReference(CRD_GROUP, CRD_VERSION, RESOURCE_PLURAL, resource_name, namespace="default") resource = k8s.create_custom_resource(reference, processing_job) resource = k8s.wait_resource_consumed_by_controller(reference) assert resource is not None yield (reference, resource) # Delete the k8s resource if not already deleted by tests try: k8s.delete_custom_resource(reference) except: pass
def test_create_delete_non_public(self, amq_client): resource_name = "my-rabbit-broker-non-public" replacements = REPLACEMENT_VALUES.copy() replacements["BROKER_NAME"] = resource_name resource_data = load_resource_file( SERVICE_NAME, "broker_rabbitmq_non_public", additional_replacements=replacements, ) logging.error(resource_data) # Create the k8s resource ref = k8s.CustomResourceReference( CRD_GROUP, CRD_VERSION, RESOURCE_PLURAL, resource_name, namespace="default", ) k8s.create_custom_resource(ref, resource_data) cr = k8s.wait_resource_consumed_by_controller(ref) assert cr is not None assert k8s.get_resource_exists(ref) broker_id = cr['status']['brokerID'] # Let's check that the Broker appears in AmazonMQ aws_res = amq_client.describe_broker(BrokerId=broker_id) assert aws_res is not None now = datetime.datetime.now() timeout = now + datetime.timedelta(seconds=CREATE_TIMEOUT_SECONDS) # TODO(jaypipes): Move this into generic AWS-side waiter while aws_res['BrokerState'] != "RUNNING": if datetime.datetime.now() >= timeout: raise Exception("failed to find running Broker before timeout") time.sleep(CREATE_INTERVAL_SLEEP_SECONDS) aws_res = amq_client.describe_broker(BrokerId=broker_id) assert aws_res is not None # Delete the k8s resource on teardown of the module k8s.delete_custom_resource(ref) time.sleep(DELETE_WAIT_AFTER_SECONDS) # Broker should no longer appear in AmazonMQ res_found = False try: amq_client.describe_broker(BrokerId=broker_id) res_found = True except amq_client.exceptions.NotFoundException: pass assert res_found is False
def xgboost_hpojob(): hpo_job, data = _make_hpojob() resource = k8s.create_custom_resource(hpo_job, data) resource = k8s.wait_resource_consumed_by_controller(hpo_job) yield (hpo_job, resource) if k8s.get_resource_exists(hpo_job): k8s.delete_custom_resource(hpo_job)
def xgboost_transformjob(sagemaker_client): #Create model using boto3 for TransformJob transform_model_file = "s3://{d}/sagemaker/batch/model.tar.gz".format( d=get_bootstrap_resources().DataBucketName) model_name = random_suffix_name("xgboost-model", 32) create_response = sagemaker_client.create_model( ModelName=model_name, PrimaryContainer={ 'Image': REPLACEMENT_VALUES["XGBOOST_IMAGE_URI"], 'ModelDataUrl': transform_model_file, 'Environment': {} }, ExecutionRoleArn=REPLACEMENT_VALUES["SAGEMAKER_EXECUTION_ROLE_ARN"]) logging.debug(create_response) #Check if the model is created successfully describe_model_response = sagemaker_client.describe_model( ModelName=model_name) assert describe_model_response["ModelName"] is not None resource_name = random_suffix_name("xgboost-transformjob", 32) #Use the model created above replacements = REPLACEMENT_VALUES.copy() replacements["MODEL_NAME"] = model_name replacements["TRANSFORM_JOB_NAME"] = resource_name transformjob = load_resource_file(SERVICE_NAME, "xgboost_transformjob", additional_replacements=replacements) logging.debug(transformjob) # Create the k8s resource reference = k8s.CustomResourceReference(CRD_GROUP, CRD_VERSION, RESOURCE_PLURAL, resource_name, namespace="default") resource = k8s.create_custom_resource(reference, transformjob) resource = k8s.wait_resource_consumed_by_controller(reference) assert resource is not None yield (reference, resource) try: # Delete the k8s resource if not already deleted by tests k8s.delete_custom_resource(reference) except: pass try: # Delete the model created sagemaker_client.delete_model(ModelName=model_name) except: pass
def rg_auth_token(make_rg_name, make_replication_group, rg_deletion_waiter, first_secret, second_secret): input_dict = { "RG_ID": make_rg_name("rg-auth-token"), "NAME": "first", "KEY": "secret1" } (reference, resource) = make_replication_group("replicationgroup_authtoken", input_dict, input_dict["RG_ID"]) yield (reference, resource) k8s.delete_custom_resource(reference) sleep(DEFAULT_WAIT_SECS) rg_deletion_waiter.wait(ReplicationGroupId=input_dict["RG_ID"]) #throws exception if wait fails
def single_variant_config(): config_resource_name = random_suffix_name("single-variant-config", 32) model_resource_name = config_resource_name + "-model" replacements = REPLACEMENT_VALUES.copy() replacements["CONFIG_NAME"] = config_resource_name replacements["MODEL_NAME"] = model_resource_name model = load_resource_file(SERVICE_NAME, "xgboost_model", additional_replacements=replacements) logging.debug(model) config = load_resource_file( SERVICE_NAME, "endpoint_config_single_variant", additional_replacements=replacements, ) logging.debug(config) # Create the k8s resources model_reference = k8s.CustomResourceReference( CRD_GROUP, CRD_VERSION, MODEL_RESOURCE_PLURAL, model_resource_name, namespace="default", ) model_resource = k8s.create_custom_resource(model_reference, model) model_resource = k8s.wait_resource_consumed_by_controller(model_reference) assert model_resource is not None config_reference = k8s.CustomResourceReference( CRD_GROUP, CRD_VERSION, CONFIG_RESOURCE_PLURAL, config_resource_name, namespace="default", ) config_resource = k8s.create_custom_resource(config_reference, config) config_resource = k8s.wait_resource_consumed_by_controller( config_reference) assert config_resource is not None yield (config_reference, config_resource) # Delete the k8s resource if not already deleted by tests try: k8s.delete_custom_resource(model_reference) k8s.delete_custom_resource(config_reference) except: pass
def rg_largecluster(rg_largecluster_input, make_replication_group, rg_deletion_waiter): input_dict = rg_largecluster_input (reference, resource) = make_replication_group("replicationgroup_largecluster", input_dict, input_dict["RG_ID"]) yield (reference, resource) # teardown k8s.delete_custom_resource(reference) sleep(DEFAULT_WAIT_SECS) rg_deletion_waiter.wait(ReplicationGroupId=input_dict["RG_ID"])
def rg_cmd_fromsnapshot(bootstrap_resources, make_rg_name, make_replication_group, rg_deletion_waiter): input_dict = { "RG_ID": make_rg_name("rg-cmd-fromsnapshot"), "SNAPSHOT_NAME": bootstrap_resources.SnapshotName } (reference, resource) = make_replication_group("replicationgroup_cmd_fromsnapshot", input_dict, input_dict["RG_ID"]) yield (reference, resource) # teardown k8s.delete_custom_resource(reference) sleep(DEFAULT_WAIT_SECS) rg_deletion_waiter.wait(ReplicationGroupId=input_dict["RG_ID"])
def rg_input_coverage(bootstrap_resources, make_rg_name, make_replication_group, rg_deletion_waiter): input_dict = { "RG_ID": make_rg_name("rg-input-coverage"), "KMS_KEY_ID": bootstrap_resources.KmsKeyID, "SNS_TOPIC_ARN": bootstrap_resources.SnsTopicARN, "SG_ID": bootstrap_resources.SecurityGroupID, "USERGROUP_ID": bootstrap_resources.UserGroupID } (reference, resource) = make_replication_group("replicationgroup_input_coverage", input_dict, input_dict["RG_ID"]) yield (reference, resource) # teardown k8s.delete_custom_resource(reference) sleep(DEFAULT_WAIT_SECS) rg_deletion_waiter.wait(ReplicationGroupId=input_dict["RG_ID"]) #throws exception if wait fails
def test_trainingjob_has_stopped_status(self, sagemaker_client, xgboost_trainingjob): (reference, _) = xgboost_trainingjob resource = k8s.get_resource(reference) trainingjob_name = resource['spec'].get('trainingJobName', None) assert trainingjob_name is not None # Delete the k8s resource. k8s.delete_custom_resource(reference) # TODO: This sleep could be replaced by a wait loop but this is sufficient for now. time.sleep(5) current_trainingjob_status = self._get_sagemaker_trainingjob_status( sagemaker_client, trainingjob_name) expected_trainingjob_status_list = self._get_stopped_trainingjob_status_list( ) assert current_trainingjob_status in expected_trainingjob_status_list
def test_config_is_deleted(self, sagemaker_client, single_variant_config): (reference, _) = single_variant_config resource = k8s.get_resource(reference) config_name = resource["spec"].get("endpointConfigName", None) # Delete the k8s resource. _, deleted = k8s.delete_custom_resource(reference) assert deleted is True assert (self._get_sagemaker_endpoint_config_arn( sagemaker_client, config_name) is None)
def test_model_is_deleted(self, sagemaker_client, xgboost_model): (reference, _) = xgboost_model resource = k8s.get_resource(reference) model_name = resource["spec"].get("modelName", None) # Delete the k8s resource. _, deleted = k8s.delete_custom_resource(reference) assert deleted is True assert self._get_sagemaker_model_arn(sagemaker_client, model_name) is None
def snapshot_kms(ec_client, bootstrap_resources, make_snapshot): response = ec_client.describe_snapshots( SnapshotName=bootstrap_resources.SnapshotName) cc_id = response['Snapshots'][0]['CacheClusterId'] snapshot_name = random_suffix_name("ack-snapshot-kms", 32) input_dict = { "SNAPSHOT_NAME": snapshot_name, "CC_ID": cc_id, "KMS_KEY_ID": bootstrap_resources.KmsKeyID, } (reference, resource) = make_snapshot("snapshot_kms", input_dict, input_dict['SNAPSHOT_NAME']) yield (reference, resource) # teardown k8s.delete_custom_resource(reference) assert wait_snapshot_deleted(snapshot_name)
def test_delete_endpoint(self, sagemaker_client, single_variant_xgboost_endpoint): (reference, _, _, _) = single_variant_xgboost_endpoint resource = k8s.get_resource(reference) endpoint_name = resource["spec"].get("endpointName", None) # Delete the k8s resource. _, deleted = k8s.delete_custom_resource(reference) assert deleted is True assert ( self._describe_sagemaker_endpoint(sagemaker_client, endpoint_name) is None )
def test_transformjob_has_stopped_status(self, sagemaker_client, xgboost_transformjob): (reference, resource) = xgboost_transformjob transformjob_name = resource['spec'].get('transformJobName', None) assert transformjob_name is not None # Delete the k8s resource. _, deleted = k8s.delete_custom_resource(reference) assert deleted is True assert (self._get_sagemaker_transformjob_status( sagemaker_client, transformjob_name) ) in self._get_stopped_transformjob_status_list()
def test_processing_job_has_stopped_status(self, sagemaker_client, kmeans_processing_job): (reference, _) = kmeans_processing_job resource = k8s.get_resource(reference) processing_job_name = resource["spec"].get("processingJobName", None) assert processing_job_name is not None # Delete the k8s resource. _, deleted = k8s.delete_custom_resource(reference) assert deleted is True current_processing_job_status = self._get_sagemaker_processing_job_status( sagemaker_client, processing_job_name) expected_processing_job_status_list = ( self._get_stopped_processing_job_status_list()) assert current_processing_job_status in expected_processing_job_status_list
def test_trainingjob_has_stopped_status( self, sagemaker_client, xgboost_trainingjob ): (reference, _) = xgboost_trainingjob resource = k8s.get_resource(reference) trainingjob_name = resource["spec"].get("trainingJobName", None) assert trainingjob_name is not None # Delete the k8s resource. _, deleted = k8s.delete_custom_resource(reference) assert deleted is True current_trainingjob_status = self._get_sagemaker_trainingjob_status( sagemaker_client, trainingjob_name ) expected_trainingjob_status_list = self._get_stopped_trainingjob_status_list() assert current_trainingjob_status in expected_trainingjob_status_list
def test_create_hpo(self, xgboost_hpojob): (reference, resource) = xgboost_hpojob assert k8s.get_resource_exists(reference) hpo_job_name = resource["spec"].get("hyperParameterTuningJobName", None) assert hpo_job_name is not None hpo_sm_desc = get_sagemaker_hpo_job(hpo_job_name) assert k8s.get_resource_arn( resource) == hpo_sm_desc["HyperParameterTuningJobArn"] assert hpo_sm_desc[ "HyperParameterTuningJobStatus"] in HPO_JOB_STATUS_CREATED # Delete the k8s resource. _, deleted = k8s.delete_custom_resource(reference) assert deleted is True hpo_sm_desc = get_sagemaker_hpo_job(hpo_job_name) assert hpo_sm_desc[ "HyperParameterTuningJobStatus"] in HPO_JOB_STATUS_STOPPED
def single_variant_xgboost_endpoint(): endpoint_resource_name = random_suffix_name("single-variant-endpoint", 32) config1_resource_name = endpoint_resource_name + "-config" model_resource_name = config1_resource_name + "-model" replacements = REPLACEMENT_VALUES.copy() replacements["ENDPOINT_NAME"] = endpoint_resource_name replacements["CONFIG_NAME"] = config1_resource_name replacements["MODEL_NAME"] = model_resource_name model = load_resource_file( SERVICE_NAME, "xgboost_model", additional_replacements=replacements ) logging.debug(model) config = load_resource_file( SERVICE_NAME, "endpoint_config_single_variant", additional_replacements=replacements, ) logging.debug(config) endpoint_spec = load_resource_file( SERVICE_NAME, "endpoint_base", additional_replacements=replacements ) logging.debug(endpoint_spec) # Create the k8s resources model_reference = k8s.CustomResourceReference( CRD_GROUP, CRD_VERSION, MODEL_RESOURCE_PLURAL, model_resource_name, namespace="default", ) model_resource = k8s.create_custom_resource(model_reference, model) model_resource = k8s.wait_resource_consumed_by_controller(model_reference) assert model_resource is not None config1_reference = k8s.CustomResourceReference( CRD_GROUP, CRD_VERSION, CONFIG_RESOURCE_PLURAL, config1_resource_name, namespace="default", ) config1_resource = k8s.create_custom_resource(config1_reference, config) config1_resource = k8s.wait_resource_consumed_by_controller(config1_reference) assert config1_resource is not None config2_resource_name = random_suffix_name("2-single-variant-endpoint", 32) config["metadata"]["name"] = config["spec"][ "endpointConfigName" ] = config2_resource_name logging.debug(config) config2_reference = k8s.CustomResourceReference( CRD_GROUP, CRD_VERSION, CONFIG_RESOURCE_PLURAL, config2_resource_name, namespace="default", ) config2_resource = k8s.create_custom_resource(config2_reference, config) config2_resource = k8s.wait_resource_consumed_by_controller(config2_reference) assert config2_resource is not None endpoint_reference = k8s.CustomResourceReference( CRD_GROUP, CRD_VERSION, ENDPOINT_RESOURCE_PLURAL, endpoint_resource_name, namespace="default", ) endpoint_resource = k8s.create_custom_resource(endpoint_reference, endpoint_spec) endpoint_resource = k8s.wait_resource_consumed_by_controller(endpoint_reference) assert endpoint_resource is not None yield (endpoint_reference, endpoint_resource, endpoint_spec, config2_resource_name) # Delete the k8s resource if not already deleted by tests for cr in (model_reference, config1_reference, config2_reference, endpoint_reference): try: k8s.delete_custom_resource(cr) except: pass
def test_create_delete_2az(self, rds_client): resource_name = "my-subnet-group" resource_desc = "my-subnet-group description" br_resources = get_bootstrap_resources() replacements = REPLACEMENT_VALUES.copy() replacements["DB_SUBNET_GROUP_NAME"] = resource_name replacements["DB_SUBNET_GROUP_DESC"] = resource_desc replacements["SUBNET_AZ1"] = br_resources.SubnetAZ1 replacements["SUBNET_AZ2"] = br_resources.SubnetAZ2 resource_data = load_resource_file( SERVICE_NAME, "db_subnet_group_2az", additional_replacements=replacements, ) logging.debug(resource_data) # Create the k8s resource ref = k8s.CustomResourceReference( CRD_GROUP, CRD_VERSION, RESOURCE_PLURAL, resource_name, namespace="default", ) k8s.create_custom_resource(ref, resource_data) cr = k8s.wait_resource_consumed_by_controller(ref) assert cr is not None assert k8s.get_resource_exists(ref) # Let's check that the DB subnet group appears in RDS aws_res = rds_client.describe_db_subnet_groups( DBSubnetGroupName=resource_name) assert aws_res is not None assert len(aws_res['DBSubnetGroups']) == 1 now = datetime.datetime.now() timeout = now + datetime.timedelta(seconds=CREATE_TIMEOUT_SECONDS) # TODO(jaypipes): Move this into generic AWS-side waiter while aws_res['DBSubnetGroups'][0]['SubnetGroupStatus'] != "Complete": if datetime.datetime.now() >= timeout: raise Exception( "failed to find DB subnet group in Complete status before timeout" ) time.sleep(CREATE_INTERVAL_SLEEP_SECONDS) aws_res = rds_client.describe_db_subnet_groups( DBSubnetGroupName=resource_name) assert aws_res is not None assert len(aws_res['DBSubnetGroups']) == 1 # Delete the k8s resource on teardown of the module k8s.delete_custom_resource(ref) time.sleep(DELETE_WAIT_AFTER_SECONDS) # DB subnet group should no longer appear in RDS try: aws_res = rds_client.describe_db_subnet_groups( DBSubnetGroupName=resource_name) assert False except rds_client.exceptions.DBSubnetGroupNotFoundFault: pass