def assert_domain_status_in_sync(domain_id, reference, expected_status): sm_status = wait_for_status( expected_status, 10, 30, get_domain_sagemaker_status, domain_id ) k8s_status = wait_for_status( expected_status, 10, 30, get_k8s_resource_status, reference ) assert sm_status == k8s_status == expected_status
def assert_app_status_in_sync( domain_id, user_profile_name, app_type, app_name, reference, expected_status ): sm_status = wait_for_status( expected_status, 10, 30, get_app_sagemaker_status, domain_id, user_profile_name, app_type, app_name, ) k8s_status = wait_for_status( expected_status, 10, 30, get_k8s_resource_status, reference ) assert sm_status == k8s_status == expected_status
def user_profile_fixture(domain_fixture): (domain_reference, domain_resource, domain_spec) = domain_fixture assert k8s.get_resource_exists(domain_reference) domain_id = domain_resource["status"].get("domainID", None) assert domain_id is not None assert_domain_status_in_sync(domain_id, domain_reference, "InService") domain_resource = patch_domain_kernel_instance( domain_reference, domain_spec, "ml.t3.large" ) wait_for_status("ml.t3.large", 10, 30, get_domain_kernel_instance, domain_id) assert_domain_status_in_sync(domain_id, domain_reference, "InService") resource_name = random_suffix_name("profile", 15) ( user_profile_reference, user_profile_resource, user_profile_spec, ) = apply_user_profile_yaml(resource_name, domain_id) assert user_profile_resource is not None if k8s.get_resource_arn(user_profile_resource) is None: logging.error( f"ARN for this resource is None, resource status is: {user_profile_resource['status']}" ) assert k8s.get_resource_arn(user_profile_resource) is not None yield ( domain_reference, domain_resource, domain_spec, user_profile_reference, user_profile_resource, user_profile_spec, ) if k8s.get_resource_exists(user_profile_reference): _, deleted = k8s.delete_custom_resource( user_profile_reference, cfg.JOB_DELETE_WAIT_PERIODS, cfg.JOB_DELETE_WAIT_LENGTH, ) assert deleted
def wait_resource_monitoring_schedule_status( reference: k8s.CustomResourceReference, expected_status: str, wait_periods: int = 6, period_length: int = 30, ): return wait_for_status( expected_status, wait_periods, period_length, get_monitoring_schedule_resource_status, reference, )
def _wait_sagemaker_transform_status( self, transform_job_name, expected_status: str, wait_periods: int = 30, period_length: int = 30, ): return wait_for_status( expected_status, wait_periods, period_length, get_transform_sagemaker_status, transform_job_name, )
def _wait_resource_hpo_status( self, reference: k8s.CustomResourceReference, expected_status: str, wait_periods: int = 45, period_length: int = 30, ): return wait_for_status( expected_status, wait_periods, period_length, get_hpo_resource_status, reference, )
def _wait_sagemaker_hpo_status( self, hpo_job_name, expected_status: str, wait_periods: int = 45, period_length: int = 30, ): return wait_for_status( expected_status, wait_periods, period_length, get_hpo_sagemaker_status, hpo_job_name, )
def _wait_resource_model_package_group_status( self, reference: k8s.CustomResourceReference, expected_status: str, wait_periods: int = 30, period_length: int = 30, ): return wait_for_status( expected_status, wait_periods, period_length, get_model_package_group_resource_status, reference, )
def _wait_feature_group_status( self, feature_group_name, expected_status: str, wait_periods: int = WAIT_PERIOD_COUNT, period_length: int = WAIT_PERIOD_LENGTH, ): return wait_for_status( expected_status, wait_periods, period_length, get_feature_group_status, feature_group_name, )
def _wait_sagemaker_processing_status( self, processing_job_name, expected_status: str, wait_periods: int = 60, period_length: int = 30, ): return wait_for_status( expected_status, wait_periods, period_length, get_processing_sagemaker_status, processing_job_name, )
def _wait_resource_feature_group_status( self, reference: k8s.CustomResourceReference, expected_status: str, wait_periods: int = WAIT_PERIOD_COUNT, period_length: int = WAIT_PERIOD_LENGTH, ): return wait_for_status( expected_status, wait_periods, period_length, get_resource_feature_group_status, reference, )
def _wait_sagemaker_model_package_status( self, model_package_name, expected_status: str, wait_periods: int = 30, period_length: int = 30, ): return wait_for_status( expected_status, wait_periods, period_length, get_model_package_sagemaker_status, model_package_name, )
def _wait_sagemaker_notebook_status( self, notebook_instance_name: str, expected_status: str, wait_periods: int = 30, period_length: int = 30, ): return wait_for_status( expected_status, wait_periods, period_length, get_notebook_instance_sagemaker_status, notebook_instance_name, )
def wait_sagemaker_monitoring_schedule_status( sagemaker_client, monitoring_schedule_name, expected_status: str, wait_periods: int = 6, period_length: int = 30, ): return wait_for_status( expected_status, wait_periods, period_length, get_monitoring_schedule_sagemaker_status, sagemaker_client, monitoring_schedule_name, )
def _wait_resource_training_rule_eval_status( self, reference: k8s.CustomResourceReference, rule_type: str, expected_status: str, wait_periods: int = 30, period_length: int = 30, ): return wait_for_status( expected_status, wait_periods, period_length, get_training_rule_eval_resource_status, reference, rule_type, )
def _wait_sagemaker_training_rule_eval_status( self, training_job_name, rule_type: str, expected_status: str, wait_periods: int = 30, period_length: int = 30, ): return wait_for_status( expected_status, wait_periods, period_length, get_training_rule_eval_sagemaker_status, training_job_name, rule_type, )
def app_fixture(user_profile_fixture): ( domain_reference, domain_resource, domain_spec, user_profile_reference, user_profile_resource, user_profile_spec, ) = user_profile_fixture assert k8s.get_resource_exists(domain_reference) assert k8s.get_resource_exists(user_profile_reference) domain_id = domain_resource["status"].get("domainID", None) user_profile_name = user_profile_resource["spec"]["userProfileName"] assert_user_profile_status_in_sync( domain_id, user_profile_name, user_profile_reference, "InService" ) user_profile_resource = patch_user_profile_kernel_instance( user_profile_reference, user_profile_spec, "ml.t3.large" ) wait_for_status( "ml.t3.large", 10, 30, get_user_profile_kernel_instance, domain_id, user_profile_name, ) assert_user_profile_status_in_sync( domain_id, user_profile_name, user_profile_reference, "InService" ) (app_reference, app_resource, app_spec) = apply_app_yaml( domain_id, user_profile_name ) assert app_resource is not None if k8s.get_resource_arn(app_resource) is None: logging.error( f"ARN for this resource is None, resource status is: {app_resource['status']}" ) assert k8s.get_resource_arn(app_resource) is not None yield ( domain_reference, domain_resource, domain_spec, user_profile_reference, user_profile_resource, user_profile_spec, app_reference, app_resource, app_spec, ) if k8s.get_resource_exists(app_reference): _, deleted = k8s.delete_custom_resource( app_reference, cfg.JOB_DELETE_WAIT_PERIODS, cfg.JOB_DELETE_WAIT_LENGTH, ) assert deleted