def xgboost_transformjob(xgboost_model_for_transform): (transform_resource_name, model_resource_name) = xgboost_model_for_transform replacements = REPLACEMENT_VALUES.copy() replacements["MODEL_NAME"] = model_resource_name replacements["TRANSFORM_JOB_NAME"] = transform_resource_name reference, _, resource = create_sagemaker_resource( resource_plural=RESOURCE_PLURAL, resource_name=transform_resource_name, spec_file="xgboost_transformjob", replacements=replacements, ) assert resource is not None if k8s.get_resource_arn(resource) is None: logging.error( f"ARN for this resource is None, resource status is: {resource['status']}" ) assert k8s.get_resource_arn(resource) is not None yield (reference, resource) if k8s.get_resource_exists(reference): _, deleted = k8s.delete_custom_resource(reference, cfg.JOB_DELETE_WAIT_PERIODS, cfg.JOB_DELETE_WAIT_LENGTH) assert deleted
def multi_variant_config(name_suffix, single_container_model): config_resource_name = name_suffix + "-multi-variant-config" (_, model_resource) = single_container_model model_resource_name = model_resource["spec"].get("modelName", None) replacements = REPLACEMENT_VALUES.copy() replacements["ENDPOINT_CONFIG_NAME"] = config_resource_name replacements["MODEL_NAME"] = model_resource_name config_reference, config_spec, config_resource = create_sagemaker_resource( resource_plural=cfg.ENDPOINT_CONFIG_RESOURCE_PLURAL, resource_name=config_resource_name, spec_file="endpoint_config_multi_variant", replacements=replacements, ) assert config_resource is not None if k8s.get_resource_arn(config_resource) is None: logging.error( f"ARN for this resource is None, resource status is: {config_resource['status']}" ) assert k8s.get_resource_arn(config_resource) is not None yield (config_reference, config_resource) _, deleted = k8s.delete_custom_resource(config_reference, 3, 10) assert deleted
def xgboost_model_package_group(): resource_name = random_suffix_name("xgboost-model-package-group", 38) replacements = REPLACEMENT_VALUES.copy() replacements["MODEL_PACKAGE_GROUP_NAME"] = resource_name ( model_package_group_reference, model_package_group_spec, model_package_group_resource, ) = create_sagemaker_resource( resource_plural=cfg.MODEL_PACKAGE_GROUP_RESOURCE_PLURAL, resource_name=resource_name, spec_file="xgboost_model_package_group", replacements=replacements, ) assert model_package_group_resource is not None if k8s.get_resource_arn(model_package_group_resource) is None: logging.error( f"ARN for this resource is None, resource status is: {model_package_group_resource['status']}" ) assert k8s.get_resource_arn(model_package_group_resource) is not None yield (model_package_group_reference, model_package_group_resource) # Delete the k8s resource if not already deleted by tests if k8s.get_resource_exists(model_package_group_reference): _, deleted = k8s.delete_custom_resource(model_package_group_reference, DELETE_WAIT_PERIOD, DELETE_WAIT_LENGTH) assert deleted
def xgboost_training_job(): resource_name = random_suffix_name("xgboost-trainingjob", 32) replacements = REPLACEMENT_VALUES.copy() replacements["TRAINING_JOB_NAME"] = resource_name reference, _, resource = create_sagemaker_resource( resource_plural=RESOURCE_PLURAL, resource_name=resource_name, spec_file="xgboost_trainingjob", replacements=replacements, ) assert resource is not None if k8s.get_resource_arn(resource) is None: logging.error( f"ARN for this resource is None, resource status is: {resource['status']}" ) assert k8s.get_resource_arn(resource) is not None yield (reference, resource) if k8s.get_resource_exists(reference): _, deleted = k8s.delete_custom_resource(reference, cfg.JOB_DELETE_WAIT_PERIODS, cfg.JOB_DELETE_WAIT_LENGTH) assert deleted
def test_versioned_model_package_completed( self, xgboost_versioned_model_package): (reference, spec, resource) = xgboost_versioned_model_package assert k8s.get_resource_exists(reference) model_package_group_name = resource["spec"].get( "modelPackageGroupName") # Model package name for Versioned Model packages is the ARN of the resource model_package_name = sagemaker_client().list_model_packages( ModelPackageGroupName=model_package_group_name )["ModelPackageSummaryList"][0]["ModelPackageArn"] model_package_desc = get_sagemaker_model_package(model_package_name) if k8s.get_resource_arn(resource) is None: logging.error( f"ARN for this resource is None, resource status is: {resource['status']}" ) assert k8s.get_resource_arn(resource) == model_package_name self._assert_model_package_status_in_sync(model_package_name, reference, cfg.JOB_STATUS_INPROGRESS) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False") self._assert_model_package_status_in_sync(model_package_name, reference, cfg.JOB_STATUS_COMPLETED) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True") # Update the resource new_model_approval_status = "Approved" approval_description = "Approved modelpackage" spec["spec"]["modelApprovalStatus"] = new_model_approval_status spec["spec"]["approvalDescription"] = approval_description resource = k8s.patch_custom_resource(reference, spec) resource = k8s.wait_resource_consumed_by_controller(reference) assert resource is not None self._assert_model_package_status_in_sync(model_package_name, reference, cfg.JOB_STATUS_COMPLETED) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True") model_package_desc = get_sagemaker_model_package(model_package_name) assert model_package_desc[ "ModelApprovalStatus"] == new_model_approval_status assert model_package_desc[ "ApprovalDescription"] == approval_description assert (resource["spec"].get("modelApprovalStatus", None) == new_model_approval_status) assert resource["spec"].get("approvalDescription", None) == approval_description # Check that you can delete a completed resource from k8s _, deleted = k8s.delete_custom_resource(reference, DELETE_WAIT_PERIOD, DELETE_WAIT_LENGTH) assert deleted is True assert get_sagemaker_model_package(model_package_name) is None
def faulty_config(name_suffix, single_container_model): replacements = REPLACEMENT_VALUES.copy() # copy model data to a temp S3 location and delete it after model is created on SageMaker model_bucket = replacements["SAGEMAKER_DATA_BUCKET"] copy_source = { "Bucket": model_bucket, "Key": "sagemaker/model/xgboost-mnist-model.tar.gz", } model_destination_key = "sagemaker/model/delete/xgboost-mnist-model.tar.gz" s3.copy_object(model_bucket, copy_source, model_destination_key) model_resource_name = name_suffix + "faulty-model" replacements["MODEL_NAME"] = model_resource_name replacements[ "MODEL_LOCATION"] = f"s3://{model_bucket}/{model_destination_key}" model_reference, model_spec, model_resource = create_sagemaker_resource( resource_plural=cfg.MODEL_RESOURCE_PLURAL, resource_name=model_resource_name, spec_file="xgboost_model_with_model_location", replacements=replacements, ) assert model_resource is not None if k8s.get_resource_arn(model_resource) is None: logging.error( f"ARN for this resource is None, resource status is: {model_resource['status']}" ) assert k8s.get_resource_arn(model_resource) is not None s3.delete_object(model_bucket, model_destination_key) config_resource_name = name_suffix + "-faulty-config" (_, model_resource) = single_container_model model_resource_name = model_resource["spec"].get("modelName", None) replacements["ENDPOINT_CONFIG_NAME"] = config_resource_name config_reference, config_spec, config_resource = create_sagemaker_resource( resource_plural=cfg.ENDPOINT_CONFIG_RESOURCE_PLURAL, resource_name=config_resource_name, spec_file="endpoint_config_multi_variant", replacements=replacements, ) assert config_resource is not None if k8s.get_resource_arn(config_resource) is None: logging.error( f"ARN for this resource is None, resource status is: {config_resource['status']}" ) assert k8s.get_resource_arn(config_resource) is not None yield (config_reference, config_resource) for cr in (model_reference, config_reference): _, deleted = k8s.delete_custom_resource(cr, 3, 10) assert deleted
def test_smoke( self, sagemaker_client, xgboost_churn_model_explainability_job_definition ): (reference, resource) = xgboost_churn_model_explainability_job_definition assert k8s.get_resource_exists(reference) job_definition_name = resource["spec"].get("jobDefinitionName") job_definition_desc = get_sagemaker_model_explainability_job_definition( sagemaker_client, job_definition_name ) job_definition_arn = job_definition_desc["JobDefinitionArn"] assert k8s.get_resource_arn(resource) == job_definition_arn # random sleep before we check for tags to reduce test flakyness time.sleep(cfg.TAG_DELAY_SLEEP) resource_tags = resource["spec"].get("tags", None) assert_tags_in_sync(job_definition_arn, resource_tags) # Delete the k8s resource. _, deleted = k8s.delete_custom_resource(reference, 3, 10) assert deleted assert ( get_sagemaker_model_explainability_job_definition( sagemaker_client, job_definition_name ) is None )
def test_create_model_package_group(self, xgboost_model_package_group): (reference, resource) = xgboost_model_package_group assert k8s.get_resource_exists(reference) model_package_group_name = resource["spec"].get( "modelPackageGroupName", None) assert model_package_group_name is not None model_package_group_sm_desc = get_sagemaker_model_package_group( model_package_group_name) model_package_group_arn = model_package_group_sm_desc[ "ModelPackageGroupArn"] assert k8s.get_resource_arn(resource) == model_package_group_arn self._assert_model_package_group_status_in_sync( model_package_group_name, reference, cfg.JOB_STATUS_COMPLETED) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True") resource_tags = resource["spec"].get("tags", None) assert_tags_in_sync(model_package_group_arn, resource_tags) # Check that you can delete a completed resource from k8s _, deleted = k8s.delete_custom_resource(reference, cfg.DELETE_WAIT_PERIOD, cfg.DELETE_WAIT_LENGTH) assert deleted is True assert get_sagemaker_model_package_group( model_package_group_name) is None
def test_stopped(self, xgboost_training_job): (reference, resource) = xgboost_training_job assert k8s.get_resource_exists(reference) training_job_name = resource["spec"].get("trainingJobName", None) assert training_job_name is not None training_job_desc = get_sagemaker_training_job(training_job_name) assert k8s.get_resource_arn( resource) == training_job_desc["TrainingJobArn"] assert training_job_desc[ "TrainingJobStatus"] == cfg.JOB_STATUS_INPROGRESS assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False") assert_training_status_in_sync(training_job_name, reference, cfg.JOB_STATUS_INPROGRESS) # Delete the k8s resource. _, deleted = k8s.delete_custom_resource(reference, cfg.JOB_DELETE_WAIT_PERIODS, cfg.JOB_DELETE_WAIT_LENGTH) assert deleted is True training_job_desc = get_sagemaker_training_job(training_job_name) assert training_job_desc[ "TrainingJobStatus"] in cfg.LIST_JOB_STATUS_STOPPED
def test_completed(self, xgboost_training_job): (reference, resource) = xgboost_training_job assert k8s.get_resource_exists(reference) training_job_name = resource["spec"].get("trainingJobName", None) assert training_job_name is not None training_job_desc = get_sagemaker_training_job(training_job_name) training_job_arn = training_job_desc["TrainingJobArn"] assert k8s.get_resource_arn(resource) == training_job_arn assert training_job_desc[ "TrainingJobStatus"] == cfg.JOB_STATUS_INPROGRESS assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False") assert_training_status_in_sync(training_job_name, reference, cfg.JOB_STATUS_COMPLETED) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True") # model artifact URL is populated resource = k8s.get_resource(reference) resource["status"]["modelArtifacts"]["s3ModelArtifacts"] is not None resource_tags = resource["spec"].get("tags", None) assert_tags_in_sync(training_job_arn, resource_tags) # Check that you can delete a completed resource from k8s _, deleted = k8s.delete_custom_resource(reference, cfg.JOB_DELETE_WAIT_PERIODS, cfg.JOB_DELETE_WAIT_LENGTH) assert deleted is True
def test_stopped(self, xgboost_hpojob): (reference, resource) = xgboost_hpojob assert k8s.get_resource_exists(reference) hpo_job_name = resource["spec"].get("hyperParameterTuningJobName", None) assert hpo_job_name is not None hpo_sm_desc = get_sagemaker_hpo_job(hpo_job_name) assert (k8s.get_resource_arn(resource) == hpo_sm_desc["HyperParameterTuningJobArn"]) assert hpo_sm_desc[ "HyperParameterTuningJobStatus"] == cfg.JOB_STATUS_INPROGRESS assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False") self._assert_hpo_status_in_sync(hpo_job_name, reference, cfg.JOB_STATUS_INPROGRESS) # Delete the k8s resource. _, deleted = k8s.delete_custom_resource(reference, cfg.JOB_DELETE_WAIT_PERIODS, cfg.JOB_DELETE_WAIT_LENGTH) assert deleted is True hpo_sm_desc = get_sagemaker_hpo_job(hpo_job_name) assert (hpo_sm_desc["HyperParameterTuningJobStatus"] in cfg.LIST_JOB_STATUS_STOPPED)
def test_create_feature_group(self, feature_group): """Tests that a feature group can be created and deleted using the Feature Group Controller. """ (reference, resource) = feature_group assert k8s.get_resource_exists(reference) feature_group_name = resource["spec"].get("featureGroupName", None) assert feature_group_name is not None feature_group_sm_desc = get_sagemaker_feature_group(feature_group_name) feature_group_arn = feature_group_sm_desc["FeatureGroupArn"] assert k8s.get_resource_arn(resource) == feature_group_arn assert feature_group_sm_desc[ "FeatureGroupStatus"] == FEATURE_GROUP_STATUS_CREATING assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False") self._assert_feature_group_status_in_sync( feature_group_name, reference, FEATURE_GROUP_STATUS_CREATED) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True") resource_tags = resource["spec"].get("tags", None) assert_tags_in_sync(feature_group_arn, resource_tags) # Delete the k8s resource. _, deleted = k8s.delete_custom_resource(reference, WAIT_PERIOD_COUNT, WAIT_PERIOD_LENGTH) assert deleted assert get_sagemaker_feature_group(feature_group_name) is None
def test_completed(self, xgboost_hpojob): (reference, resource) = xgboost_hpojob assert k8s.get_resource_exists(reference) hpo_job_name = resource["spec"].get("hyperParameterTuningJobName", None) assert hpo_job_name is not None hpo_sm_desc = get_sagemaker_hpo_job(hpo_job_name) hpo_arn = hpo_sm_desc["HyperParameterTuningJobArn"] assert k8s.get_resource_arn(resource) == hpo_arn assert hpo_sm_desc[ "HyperParameterTuningJobStatus"] == cfg.JOB_STATUS_INPROGRESS assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False") self._assert_hpo_status_in_sync(hpo_job_name, reference, cfg.JOB_STATUS_COMPLETED) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True") resource_tags = resource["spec"].get("tags", None) assert_tags_in_sync(hpo_arn, resource_tags) # Check that you can delete a completed resource from k8s _, deleted = k8s.delete_custom_resource(reference, cfg.JOB_DELETE_WAIT_PERIODS, cfg.JOB_DELETE_WAIT_LENGTH) assert deleted is True
def test_completed(self, kmeans_processing_job): (reference, resource) = kmeans_processing_job assert k8s.get_resource_exists(reference) processing_job_name = resource["spec"].get("processingJobName", None) assert processing_job_name is not None processing_job_desc = get_sagemaker_processing_job(processing_job_name) processing_job_arn = processing_job_desc["ProcessingJobArn"] assert k8s.get_resource_arn(resource) == processing_job_arn assert processing_job_desc["ProcessingJobStatus"] == cfg.JOB_STATUS_INPROGRESS assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False") self._assert_processing_status_in_sync( processing_job_name, reference, cfg.JOB_STATUS_COMPLETED ) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True") resource_tags = resource["spec"].get("tags", None) assert_tags_in_sync(processing_job_arn, resource_tags) # Check that you can delete a completed resource from k8s _, deleted = k8s.delete_custom_resource(reference, cfg.JOB_DELETE_WAIT_PERIODS, cfg.JOB_DELETE_WAIT_LENGTH) assert deleted is True
def user_profile_fixture(domain_fixture): (domain_reference, domain_resource, domain_spec) = domain_fixture assert k8s.get_resource_exists(domain_reference) domain_id = domain_resource["status"].get("domainID", None) assert domain_id is not None assert_domain_status_in_sync(domain_id, domain_reference, "InService") domain_resource = patch_domain_kernel_instance( domain_reference, domain_spec, "ml.t3.large" ) wait_for_status("ml.t3.large", 10, 30, get_domain_kernel_instance, domain_id) assert_domain_status_in_sync(domain_id, domain_reference, "InService") resource_name = random_suffix_name("profile", 15) ( user_profile_reference, user_profile_resource, user_profile_spec, ) = apply_user_profile_yaml(resource_name, domain_id) assert user_profile_resource is not None if k8s.get_resource_arn(user_profile_resource) is None: logging.error( f"ARN for this resource is None, resource status is: {user_profile_resource['status']}" ) assert k8s.get_resource_arn(user_profile_resource) is not None yield ( domain_reference, domain_resource, domain_spec, user_profile_reference, user_profile_resource, user_profile_spec, ) if k8s.get_resource_exists(user_profile_reference): _, deleted = k8s.delete_custom_resource( user_profile_reference, cfg.JOB_DELETE_WAIT_PERIODS, cfg.JOB_DELETE_WAIT_LENGTH, ) assert deleted
def domain_fixture(): resource_name = random_suffix_name("sm-domain", 15) reference, resource, spec = apply_domain_yaml(resource_name) assert resource is not None if k8s.get_resource_arn(resource) is None: logging.error( f"ARN for this resource is None, resource status is: {resource['status']}" ) assert k8s.get_resource_arn(resource) is not None yield (reference, resource, spec) if k8s.get_resource_exists(reference): _, deleted = k8s.delete_custom_resource( reference, cfg.JOB_DELETE_WAIT_PERIODS, cfg.JOB_DELETE_WAIT_LENGTH ) assert deleted
def test_smoke(self, dynamodb_client, dynamodb_table): (_, table_resource) = dynamodb_table resource_name = random_suffix_name("backup", 32) table_name = table_resource["spec"]["tableName"] replacements = REPLACEMENT_VALUES.copy() replacements["TABLE_NAME"] = table_name replacements["BACKUP_NAME"] = resource_name # Load Backup CR resource_data = load_dynamodb_resource( "backup", additional_replacements=replacements, ) logging.debug(resource_data) # Create k8s resource ref = k8s.CustomResourceReference( CRD_GROUP, CRD_VERSION, RESOURCE_PLURAL, resource_name, namespace="default", ) k8s.create_custom_resource(ref, resource_data) cr = k8s.wait_resource_consumed_by_controller(ref) assert cr is not None assert k8s.get_resource_exists(ref) wait_for_cr_status( ref, "backupStatus", "AVAILABLE", 10, 5, ) backupArn = k8s.get_resource_arn(cr) # Check DynamoDB Backup exists exists = self.backup_exists(dynamodb_client, backupArn) assert exists # Delete k8s resource _, deleted = k8s.delete_custom_resource(ref) assert deleted is True time.sleep(DELETE_WAIT_AFTER_SECONDS) # Check DynamoDB Backup doesn't exists exists = self.backup_exists(dynamodb_client, backupArn) assert not exists
def test_processing_job_has_correct_arn(self, sagemaker_client, kmeans_processing_job): (reference, _) = kmeans_processing_job resource = k8s.get_resource(reference) processing_job_name = resource["spec"].get("processingJobName", None) assert processing_job_name is not None resource_processing_job_arn = k8s.get_resource_arn(resource) expected_processing_job_arn = self._get_sagemaker_processing_job_arn( sagemaker_client, processing_job_name) assert resource_processing_job_arn == expected_processing_job_arn
def test_trainingjob_has_correct_arn(self, sagemaker_client, xgboost_trainingjob): (reference, _) = xgboost_trainingjob resource = k8s.get_resource(reference) trainingjob_name = resource["spec"].get("trainingJobName", None) assert trainingjob_name is not None resource_trainingjob_arn = k8s.get_resource_arn(resource) expected_trainingjob_arn = self._get_sagemaker_trainingjob_arn( sagemaker_client, trainingjob_name) assert resource_trainingjob_arn == expected_trainingjob_arn
def single_container_model(name_suffix): model_resource_name = name_suffix + "-model" replacements = REPLACEMENT_VALUES.copy() replacements["MODEL_NAME"] = model_resource_name model_reference, model_spec, model_resource = create_sagemaker_resource( resource_plural=cfg.MODEL_RESOURCE_PLURAL, resource_name=model_resource_name, spec_file="xgboost_model", replacements=replacements, ) assert model_resource is not None if k8s.get_resource_arn(model_resource) is None: logging.error( f"ARN for this resource is None, resource status is: {model_resource['status']}" ) assert k8s.get_resource_arn(model_resource) is not None yield (model_reference, model_resource) _, deleted = k8s.delete_custom_resource(model_reference, 3, 10) assert deleted
def test_unversioned_model_package_completed( self, xgboost_unversioned_model_package): (reference, resource) = xgboost_unversioned_model_package assert k8s.get_resource_exists(reference) model_package_name = resource["spec"].get("modelPackageName", None) assert model_package_name is not None model_package_desc = get_sagemaker_model_package(model_package_name) model_package_arn = model_package_desc["ModelPackageArn"] if k8s.get_resource_arn(resource) is None: logging.error( f"ARN for this resource is None, resource status is: {resource['status']}" ) assert k8s.get_resource_arn(resource) == model_package_arn self._assert_model_package_status_in_sync(model_package_name, reference, cfg.JOB_STATUS_INPROGRESS) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False") self._assert_model_package_status_in_sync(model_package_name, reference, cfg.JOB_STATUS_COMPLETED) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True") resource_tags = resource["spec"].get("tags", None) assert_tags_in_sync(model_package_arn, resource_tags) # Check that you can delete a completed resource from k8s _, deleted = k8s.delete_custom_resource(reference, DELETE_WAIT_PERIOD, DELETE_WAIT_LENGTH) assert deleted is True assert get_sagemaker_model_package(model_package_name) is None
def single_variant_config(): config_resource_name = random_suffix_name("single-variant-config", 32) model_resource_name = config_resource_name + "-model" replacements = REPLACEMENT_VALUES.copy() replacements["ENDPOINT_CONFIG_NAME"] = config_resource_name replacements["MODEL_NAME"] = model_resource_name model_reference, model_spec, model_resource = create_sagemaker_resource( resource_plural=cfg.MODEL_RESOURCE_PLURAL, resource_name=model_resource_name, spec_file="xgboost_model", replacements=replacements, ) assert model_resource is not None if k8s.get_resource_arn(model_resource) is None: logging.error( f"ARN for this resource is None, resource status is: {model_resource['status']}" ) assert k8s.get_resource_arn(model_resource) is not None config_reference, config_spec, config_resource = create_sagemaker_resource( resource_plural=cfg.ENDPOINT_CONFIG_RESOURCE_PLURAL, resource_name=config_resource_name, spec_file="endpoint_config_single_variant", replacements=replacements, ) assert config_resource is not None yield (config_reference, config_resource) k8s.delete_custom_resource(model_reference, 3, 10) # Delete the k8s resource if not already deleted by tests if k8s.get_resource_exists(config_reference): _, deleted = k8s.delete_custom_resource(config_reference, 3, 10) assert deleted
def test_create_cross_region_model(self, cross_region_model): (reference, resource) = cross_region_model assert k8s.get_resource_exists(reference) sm_client = sagemaker_client(get_cross_region()) model_name = resource["spec"].get("modelName", None) model_desc = get_sagemaker_model(model_name, sm_client) cross_region_model_arn = model_desc["ModelArn"] assert k8s.get_resource_arn(resource) == cross_region_model_arn # Delete the k8s resource. _, deleted = k8s.delete_custom_resource(reference, 3, 10) assert deleted assert get_sagemaker_model(model_name, sm_client) is None
def test_create_endpoint_config(self, single_variant_config): (reference, resource) = single_variant_config assert k8s.get_resource_exists(reference) config_name = resource["spec"].get("endpointConfigName", None) endpoint_config_desc = get_sagemaker_endpoint_config(config_name) endpoint_arn = endpoint_config_desc["EndpointConfigArn"] assert k8s.get_resource_arn(resource) == endpoint_arn # random sleep before we check for tags to reduce test flakyness time.sleep(cfg.TAG_DELAY_SLEEP) resource_tags = resource["spec"].get("tags", None) assert_tags_in_sync(endpoint_arn, resource_tags) # Delete the k8s resource. _, deleted = k8s.delete_custom_resource(reference, 3, 10) assert deleted assert get_sagemaker_endpoint_config(config_name) is None
def test_completed(self, xgboost_training_job_debugger): (reference, resource) = xgboost_training_job_debugger assert k8s.get_resource_exists(reference) training_job_name = resource["spec"].get("trainingJobName", None) assert training_job_name is not None training_job_desc = get_sagemaker_training_job(training_job_name) training_job_arn = training_job_desc["TrainingJobArn"] resource_arn = k8s.get_resource_arn(resource) if resource_arn is None: logging.error( f"ARN for this resource is None, resource status is: {resource['status']}" ) assert resource_arn == training_job_arn assert training_job_desc[ "TrainingJobStatus"] == cfg.JOB_STATUS_INPROGRESS assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False") assert_training_status_in_sync(training_job_name, reference, cfg.JOB_STATUS_COMPLETED) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False") # Assert debugger rule evaluation completed self._assert_training_rule_eval_status_in_sync( training_job_name, "DebugRule", reference, cfg.RULE_STATUS_COMPLETED) # Assert profiler rule evaluation completed self._assert_training_rule_eval_status_in_sync( training_job_name, "ProfilerRule", reference, cfg.RULE_STATUS_COMPLETED) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True") resource_tags = resource["spec"].get("tags", None) assert_tags_in_sync(training_job_arn, resource_tags) # Check that you can delete a completed resource from k8s _, deleted = k8s.delete_custom_resource(reference, cfg.JOB_DELETE_WAIT_PERIODS, cfg.JOB_DELETE_WAIT_LENGTH) assert deleted is True
def test_create_model(self, xgboost_model): (reference, resource) = xgboost_model assert k8s.get_resource_exists(reference) model_name = resource["spec"].get("modelName", None) model_desc = get_sagemaker_model(model_name) model_arn = model_desc["ModelArn"] assert k8s.get_resource_arn(resource) == model_arn # random sleep before we check for tags to reduce test flakyness time.sleep(cfg.TAG_DELAY_SLEEP) resource_tags = resource["spec"].get("tags", None) assert_tags_in_sync(model_arn, resource_tags) # Delete the k8s resource. _, deleted = k8s.delete_custom_resource(reference, 3, 10) assert deleted assert get_sagemaker_model(model_name) is None
def create_notebook_test(self, notebook_instance): (reference, resource, _) = notebook_instance assert k8s.get_resource_exists(reference) assert k8s.get_resource_arn(resource) is not None # Create the resource and verify that its Pending notebook_instance_name = resource["spec"].get("notebookInstanceName", None) assert notebook_instance_name is not None notebook_description = get_notebook_instance(notebook_instance_name) assert notebook_description["NotebookInstanceStatus"] == "Pending" assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False") self._assert_notebook_status_in_sync(notebook_instance_name, reference, "Pending") # wait for the resource to go to the InService state and make sure the operator is synced with sagemaker. self._assert_notebook_status_in_sync(notebook_instance_name, reference, "InService") assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True")
def test_create_hpo(self, xgboost_hpojob): (reference, resource) = xgboost_hpojob assert k8s.get_resource_exists(reference) hpo_job_name = resource["spec"].get("hyperParameterTuningJobName", None) assert hpo_job_name is not None hpo_sm_desc = get_sagemaker_hpo_job(hpo_job_name) assert (k8s.get_resource_arn(resource) == hpo_sm_desc["HyperParameterTuningJobArn"]) assert hpo_sm_desc[ "HyperParameterTuningJobStatus"] in HPO_JOB_STATUS_CREATED # Delete the k8s resource. _, deleted = k8s.delete_custom_resource(reference) assert deleted is True hpo_sm_desc = get_sagemaker_hpo_job(hpo_job_name) assert hpo_sm_desc[ "HyperParameterTuningJobStatus"] in HPO_JOB_STATUS_STOPPED
def test_create_update_delete(self, notebook_instance_lifecycleConfig): (reference, resource, spec) = notebook_instance_lifecycleConfig assert k8s.get_resource_exists(reference) # Getting the resource name notebook_instance_lfc_name = resource["spec"].get( "notebookInstanceLifecycleConfigName", None) assert notebook_instance_lfc_name is not None notebook_instance_lfc_desc = get_notebook_instance_lifecycle_config( notebook_instance_lfc_name) assert ( k8s.get_resource_arn(resource) == notebook_instance_lfc_desc["NotebookInstanceLifecycleConfigArn"]) # We need to keep track of the current time so its best to just do # the update test with the create test. # update content is pip install six assert "lastModifiedTime" in resource["status"] last_modified_time = resource["status"]["lastModifiedTime"] update_content = "cGlwIGluc3RhbGwgc2l4" spec["spec"]["onStart"] = [{ "content": update_content }] # cGlwIGluc3RhbGwgc2l4 = pip install six k8s.patch_custom_resource(reference, spec) assert self.wait_until_update(reference, last_modified_time) == True # Verifying that an update was successful notebook_instance_lfc_desc = get_notebook_instance_lifecycle_config( notebook_instance_lfc_name) assert notebook_instance_lfc_desc["OnStart"][0][ "Content"] == update_content # Deleting the resource _, deleted = k8s.delete_custom_resource(reference, DELETE_WAIT_PERIOD, DELETE_PERIOD_LENGTH) assert deleted is True assert ( get_notebook_instance_lifecycle_config(notebook_instance_lfc_name) is None)
def create_endpoint_test(self, xgboost_endpoint): (reference, resource, _) = xgboost_endpoint assert k8s.get_resource_exists(reference) # endpoint has correct arn and status endpoint_name = resource["spec"].get("endpointName", None) assert endpoint_name is not None endpoint_desc = get_sagemaker_endpoint(endpoint_name) endpoint_arn = endpoint_desc["EndpointArn"] assert k8s.get_resource_arn(resource) == endpoint_arn # endpoint transitions Creating -> InService state assert_endpoint_status_in_sync(endpoint_name, reference, cfg.ENDPOINT_STATUS_CREATING) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False") assert_endpoint_status_in_sync(endpoint_name, reference, cfg.ENDPOINT_STATUS_INSERVICE) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True") resource_tags = resource["spec"].get("tags", None) assert_tags_in_sync(endpoint_arn, resource_tags)