def test_constraints_object_creation_from_s3_uri_without_customizations( sagemaker_session): with open(os.path.join(tests.integ.DATA_DIR, "monitor/constraints.json"), "r") as f: file_body = f.read() file_name = "constraints.json" desired_s3_uri = os.path.join( "s3://", sagemaker_session.default_bucket(), "integ-test-test-monitoring-files", str(uuid.uuid4()), file_name, ) s3_uri = S3Uploader.upload_string_as_file_body( body=file_body, desired_s3_uri=desired_s3_uri, session=sagemaker_session) constraints = Constraints.from_s3_uri(constraints_file_s3_uri=s3_uri, sagemaker_session=sagemaker_session) assert constraints.file_s3_uri.startswith("s3://") assert constraints.file_s3_uri.endswith("constraints.json") assert constraints.body_dict["monitoring_config"][ "evaluate_constraints"] == "Enabled"
def test_constraints_object_creation_from_file_path_without_customizations(): constraints = Constraints.from_file_path( constraints_file_path=os.path.join(tests.integ.DATA_DIR, "monitor/constraints.json") ) assert constraints.file_s3_uri.startswith("s3://") assert constraints.file_s3_uri.endswith("constraints.json") assert constraints.body_dict["monitoring_config"]["evaluate_constraints"] == "Enabled"
def test_constraints_object_creation_from_string_without_customizations(): with open(os.path.join(tests.integ.DATA_DIR, "monitor/constraints.json"), "r") as f: file_body = f.read() constraints = Constraints.from_string(constraints_file_string=file_body) assert constraints.file_s3_uri.startswith("s3://") assert constraints.file_s3_uri.endswith("constraints.json") assert constraints.body_dict["monitoring_config"]["evaluate_constraints"] == "Enabled"
def test_constraints_object_creation_from_file_path_with_customizations( sagemaker_session, monitoring_files_kms_key ): constraints = Constraints.from_file_path( constraints_file_path=os.path.join(tests.integ.DATA_DIR, "monitor/constraints.json"), kms_key=monitoring_files_kms_key, sagemaker_session=sagemaker_session, ) assert constraints.file_s3_uri.startswith("s3://") assert constraints.file_s3_uri.endswith("constraints.json") assert constraints.body_dict["monitoring_config"]["evaluate_constraints"] == "Enabled" constraints.set_monitoring(False) assert constraints.body_dict["monitoring_config"]["evaluate_constraints"] == "Disabled" constraints.set_monitoring(True, "message") assert ( constraints.body_dict["features"][0]["string_constraints"]["monitoring_config_overrides"][ "evaluate_constraints" ] == "Enabled" ) constraints.set_monitoring(True, "second_message") assert ( constraints.body_dict["features"][0]["string_constraints"]["monitoring_config_overrides"][ "evaluate_constraints" ] == "Enabled" ) constraints.save() new_constraints = Constraints.from_s3_uri(constraints.file_s3_uri) assert new_constraints.body_dict["monitoring_config"]["evaluate_constraints"] == "Disabled"
def test_constraints_object_creation_from_string_with_customizations( sagemaker_session, monitoring_files_kms_key ): with open(os.path.join(tests.integ.DATA_DIR, "monitor/constraints.json"), "r") as f: file_body = f.read() constraints = Constraints.from_string( constraints_file_string=file_body, kms_key=monitoring_files_kms_key, file_name="constraints.json", sagemaker_session=sagemaker_session, ) assert constraints.file_s3_uri.startswith("s3://") assert constraints.file_s3_uri.endswith("constraints.json") assert constraints.body_dict["monitoring_config"]["evaluate_constraints"] == "Enabled"
def test_one_step_data_bias_pipeline_constraint_violation( sagemaker_session, role, pipeline_name, check_job_config, data_bias_check_config, supplied_baseline_constraints_uri_param, ): data_bias_supplied_baseline_constraints = Constraints.from_file_path( constraints_file_path=os.path.join( DATA_DIR, "pipeline/clarify_check_step/data_bias/bad_cases/analysis.json"), sagemaker_session=sagemaker_session, ).file_s3_uri data_bias_check_step = ClarifyCheckStep( name="DataBiasCheckStep", clarify_check_config=data_bias_check_config, check_job_config=check_job_config, skip_check=False, register_new_baseline=False, supplied_baseline_constraints=supplied_baseline_constraints_uri_param, ) pipeline = Pipeline( name=pipeline_name, steps=[data_bias_check_step], parameters=[supplied_baseline_constraints_uri_param], sagemaker_session=sagemaker_session, ) try: response = pipeline.create(role) create_arn = response["PipelineArn"] monitoring_analysis_cfg_json = S3Downloader.read_file( data_bias_check_config.monitoring_analysis_config_uri, sagemaker_session, ) monitoring_analysis_cfg = json.loads(monitoring_analysis_cfg_json) assert monitoring_analysis_cfg is not None and len( monitoring_analysis_cfg) > 0 for _ in retries( max_retry_count=5, exception_message_prefix= "Waiting for a successful execution of pipeline", seconds_to_sleep=10, ): execution = pipeline.start(parameters={ "SuppliedBaselineConstraintsUri": data_bias_supplied_baseline_constraints }, ) response = execution.describe() assert response["PipelineArn"] == create_arn try: execution.wait(delay=30, max_attempts=60) except WaiterError: pass execution_steps = execution.list_steps() assert len(execution_steps) == 1 failure_reason = execution_steps[0].get("FailureReason", "") if _CHECK_FAIL_ERROR_MSG not in failure_reason: logging.error( f"Pipeline execution failed with error: {failure_reason}. Retrying.." ) continue assert execution_steps[0]["StepName"] == "DataBiasCheckStep" assert execution_steps[0]["StepStatus"] == "Failed" break finally: try: pipeline.delete() except Exception: pass
def test_one_step_model_quality_pipeline_constraint_violation( sagemaker_session, role, pipeline_name, check_job_config, supplied_baseline_statistics_uri_param, supplied_baseline_constraints_uri_param, model_quality_check_config, model_quality_supplied_baseline_statistics, ): model_quality_supplied_baseline_constraints = Constraints.from_file_path( constraints_file_path=os.path.join( DATA_DIR, "pipeline/quality_check_step/model_quality/bad_cases/constraints.json" ), sagemaker_session=sagemaker_session, ).file_s3_uri model_quality_check_step = QualityCheckStep( name="ModelQualityCheckStep", register_new_baseline=False, skip_check=False, quality_check_config=model_quality_check_config, check_job_config=check_job_config, supplied_baseline_statistics=supplied_baseline_statistics_uri_param, supplied_baseline_constraints=supplied_baseline_constraints_uri_param, ) pipeline = Pipeline( name=pipeline_name, steps=[model_quality_check_step], parameters=[ supplied_baseline_statistics_uri_param, supplied_baseline_constraints_uri_param, ], sagemaker_session=sagemaker_session, ) try: response = pipeline.create(role) create_arn = response["PipelineArn"] for _ in retries( max_retry_count=5, exception_message_prefix="Waiting for a successful execution of pipeline", seconds_to_sleep=10, ): execution = pipeline.start( parameters={ "SuppliedBaselineStatisticsUri": model_quality_supplied_baseline_statistics, "SuppliedBaselineConstraintsUri": model_quality_supplied_baseline_constraints, } ) response = execution.describe() assert response["PipelineArn"] == create_arn try: execution.wait(delay=30, max_attempts=60) except WaiterError: pass execution_steps = execution.list_steps() assert len(execution_steps) == 1 failure_reason = execution_steps[0].get("FailureReason", "") if _CHECK_FAIL_ERROR_MSG not in failure_reason: logging.error(f"Pipeline execution failed with error: {failure_reason}. Retrying..") continue assert execution_steps[0]["StepName"] == "ModelQualityCheckStep" assert execution_steps[0]["StepStatus"] == "Failed" break finally: try: pipeline.delete() except Exception: pass
def test_one_step_data_quality_pipeline_happycase( sagemaker_session, role, pipeline_name, check_job_config, supplied_baseline_statistics_uri_param, supplied_baseline_constraints_uri_param, data_quality_check_config, data_quality_supplied_baseline_statistics, ): data_quality_supplied_baseline_constraints = Constraints.from_file_path( constraints_file_path=os.path.join( DATA_DIR, "pipeline/quality_check_step/data_quality/good_cases/constraints.json" ), sagemaker_session=sagemaker_session, ).file_s3_uri data_quality_check_step = QualityCheckStep( name="DataQualityCheckStep", skip_check=False, register_new_baseline=False, quality_check_config=data_quality_check_config, check_job_config=check_job_config, supplied_baseline_statistics=supplied_baseline_statistics_uri_param, supplied_baseline_constraints=supplied_baseline_constraints_uri_param, ) pipeline = Pipeline( name=pipeline_name, steps=[data_quality_check_step], parameters=[ supplied_baseline_statistics_uri_param, supplied_baseline_constraints_uri_param, ], sagemaker_session=sagemaker_session, ) try: response = pipeline.create(role) create_arn = response["PipelineArn"] for _ in retries( max_retry_count=5, exception_message_prefix="Waiting for a successful execution of pipeline", seconds_to_sleep=10, ): execution = pipeline.start( parameters={ "SuppliedBaselineStatisticsUri": data_quality_supplied_baseline_statistics, "SuppliedBaselineConstraintsUri": data_quality_supplied_baseline_constraints, } ) response = execution.describe() assert response["PipelineArn"] == create_arn try: execution.wait(delay=30, max_attempts=60) except WaiterError: pass execution_steps = execution.list_steps() assert len(execution_steps) == 1 failure_reason = execution_steps[0].get("FailureReason", "") if failure_reason != "": logging.error(f"Pipeline execution failed with error: {failure_reason}. Retrying..") continue assert execution_steps[0]["StepName"] == "DataQualityCheckStep" assert execution_steps[0]["StepStatus"] == "Succeeded" data_qual_metadata = execution_steps[0]["Metadata"]["QualityCheck"] assert not data_qual_metadata["SkipCheck"] assert not data_qual_metadata["RegisterNewBaseline"] assert not data_qual_metadata.get("ViolationReport", "") assert ( data_qual_metadata["BaselineUsedForDriftCheckConstraints"] == data_quality_supplied_baseline_constraints ) assert ( data_qual_metadata["BaselineUsedForDriftCheckStatistics"] == data_quality_supplied_baseline_statistics ) assert ( data_qual_metadata["BaselineUsedForDriftCheckConstraints"] != data_qual_metadata["CalculatedBaselineConstraints"] ) assert ( data_qual_metadata["BaselineUsedForDriftCheckStatistics"] != data_qual_metadata["CalculatedBaselineStatistics"] ) break finally: try: pipeline.delete() except Exception: pass