def test_pipeline_override_experiment_config(): pipeline = Pipeline( name="MyPipeline", pipeline_experiment_config=PipelineExperimentConfig( "MyExperiment", "MyTrial"), steps=[CustomStep(name="MyStep", input_data="input")], sagemaker_session=sagemaker_session_mock, ) assert ordered(json.loads(pipeline.definition())) == ordered({ "Version": "2020-12-01", "Metadata": {}, "Parameters": [], "PipelineExperimentConfig": { "ExperimentName": "MyExperiment", "TrialName": "MyTrial" }, "Steps": [{ "Name": "MyStep", "Type": "Training", "Arguments": { "input_data": "input" }, }], })
def test_processing_step_with_framework_processor( framework_processor, pipeline_session, processing_input, network_config ): processor, run_inputs = framework_processor processor.sagemaker_session = pipeline_session processor.role = sagemaker.get_execution_role() processor.volume_kms_key = "volume-kms-key" processor.network_config = network_config run_inputs["inputs"] = processing_input step_args = processor.run(**run_inputs) step = ProcessingStep( name="MyProcessingStep", step_args=step_args, ) pipeline = Pipeline( name="MyPipeline", steps=[step], sagemaker_session=pipeline_session, ) assert json.loads(pipeline.definition())["Steps"][0] == { "Name": "MyProcessingStep", "Type": "Processing", "Arguments": step_args, }
def test_pipeline_execution_basics(sagemaker_session_mock): sagemaker_session_mock.sagemaker_client.start_pipeline_execution.return_value = { "PipelineExecutionArn": "my:arn" } sagemaker_session_mock.sagemaker_client.list_pipeline_execution_steps.return_value = { "PipelineExecutionSteps": [Mock()] } pipeline = Pipeline( name="MyPipeline", parameters=[ParameterString("alpha", "beta"), ParameterString("gamma", "delta")], steps=[], sagemaker_session=sagemaker_session_mock, ) execution = pipeline.start() execution.stop() assert sagemaker_session_mock.sagemaker_client.stop_pipeline_execution.called_with( PipelineExecutionArn="my:arn" ) execution.describe() assert sagemaker_session_mock.sagemaker_client.describe_pipeline_execution.called_with( PipelineExecutionArn="my:arn" ) steps = execution.list_steps() assert sagemaker_session_mock.sagemaker_client.describe_pipeline_execution_steps.called_with( PipelineExecutionArn="my:arn" ) assert len(steps) == 1
def test_fail_step_with_join_fn_in_error_message(): param = ParameterInteger(name="MyInt", default_value=2) cond = ConditionEquals(left=param, right=1) step_cond = ConditionStep( name="CondStep", conditions=[cond], if_steps=[], else_steps=[], ) step_fail = FailStep( name="FailStep", error_message=Join(on=": ", values=[ "Failed due to xxx == yyy returns", step_cond.properties.Outcome ]), ) pipeline = Pipeline( name="MyPipeline", steps=[step_cond, step_fail], parameters=[param], ) _expected_dsl = [ { "Name": "CondStep", "Type": "Condition", "Arguments": { "Conditions": [{ "Type": "Equals", "LeftValue": { "Get": "Parameters.MyInt" }, "RightValue": 1 }], "IfSteps": [], "ElseSteps": [], }, }, { "Name": "FailStep", "Type": "Fail", "Arguments": { "ErrorMessage": { "Std:Join": { "On": ": ", "Values": [ "Failed due to xxx == yyy returns", { "Get": "Steps.CondStep.Outcome" }, ], } } }, }, ] assert json.loads(pipeline.definition())["Steps"] == _expected_dsl
def test_training_step_with_framework_estimator(estimator, pipeline_session, training_input, hyperparameters): estimator.source_dir = DUMMY_S3_SOURCE_DIR estimator.set_hyperparameters(**hyperparameters) estimator.volume_kms_key = "volume-kms-key" estimator.output_kms_key = "output-kms-key" estimator.dependencies = ["dep-1", "dep-2"] estimator.sagemaker_session = pipeline_session step_args = estimator.fit(inputs=training_input) step = TrainingStep( name="MyTrainingStep", step_args=step_args, ) pipeline = Pipeline( name="MyPipeline", steps=[step], sagemaker_session=pipeline_session, ) assert json.loads(pipeline.definition())["Steps"][0] == { "Name": "MyTrainingStep", "Type": "Training", "Arguments": step_args, }
def test_pipeline_interpolates_lambda_outputs(sagemaker_session): parameter = ParameterString("MyStr") output_param1 = LambdaOutput(output_name="output1", output_type=LambdaOutputTypeEnum.String) output_param2 = LambdaOutput(output_name="output2", output_type=LambdaOutputTypeEnum.String) lambda_step1 = LambdaStep( name="MyLambdaStep1", depends_on=["TestStep"], lambda_func=Lambda( function_arn="arn:aws:lambda:us-west-2:123456789012:function:sagemaker_test_lambda", session=sagemaker_session, ), inputs={"arg1": "foo"}, outputs=[output_param1], ) lambda_step2 = LambdaStep( name="MyLambdaStep2", depends_on=["TestStep"], lambda_func=Lambda( function_arn="arn:aws:lambda:us-west-2:123456789012:function:sagemaker_test_lambda", session=sagemaker_session, ), inputs={"arg1": output_param1}, outputs=[output_param2], ) pipeline = Pipeline( name="MyPipeline", parameters=[parameter], steps=[lambda_step1, lambda_step2], sagemaker_session=sagemaker_session, ) assert json.loads(pipeline.definition()) == { "Version": "2020-12-01", "Metadata": {}, "Parameters": [{"Name": "MyStr", "Type": "String"}], "PipelineExperimentConfig": { "ExperimentName": {"Get": "Execution.PipelineName"}, "TrialName": {"Get": "Execution.PipelineExecutionId"}, }, "Steps": [ { "Name": "MyLambdaStep1", "Type": "Lambda", "Arguments": {"arg1": "foo"}, "DependsOn": ["TestStep"], "FunctionArn": "arn:aws:lambda:us-west-2:123456789012:function:sagemaker_test_lambda", "OutputParameters": [{"OutputName": "output1", "OutputType": "String"}], }, { "Name": "MyLambdaStep2", "Type": "Lambda", "Arguments": {"arg1": {"Get": "Steps.MyLambdaStep1.OutputParameters['output1']"}}, "DependsOn": ["TestStep"], "FunctionArn": "arn:aws:lambda:us-west-2:123456789012:function:sagemaker_test_lambda", "OutputParameters": [{"OutputName": "output2", "OutputType": "String"}], }, ], }
def test_tuning_step_with_single_algo_tuner(pipeline_session, entry_point): inputs = TrainingInput( s3_data=f"s3://{pipeline_session.default_bucket()}/training-data") pytorch_estimator = PyTorch( entry_point=entry_point, role=sagemaker.get_execution_role(), framework_version="1.5.0", py_version="py3", instance_count=1, instance_type="ml.m5.xlarge", sagemaker_session=pipeline_session, enable_sagemaker_metrics=True, max_retry_attempts=3, ) hyperparameter_ranges = { "batch-size": IntegerParameter(64, 128), } tuner = HyperparameterTuner( estimator=pytorch_estimator, objective_metric_name="test:acc", objective_type="Maximize", hyperparameter_ranges=hyperparameter_ranges, metric_definitions=[{ "Name": "test:acc", "Regex": "Overall test accuracy: (.*?);" }], max_jobs=2, max_parallel_jobs=2, ) with warnings.catch_warnings(record=True) as w: step_args = tuner.fit(inputs=inputs) assert len(w) == 1 assert issubclass(w[-1].category, UserWarning) assert "Running within a PipelineSession" in str(w[-1].message) with warnings.catch_warnings(record=True) as w: step = TuningStep( name="MyTuningStep", step_args=step_args, ) assert len(w) == 0 pipeline = Pipeline( name="MyPipeline", steps=[step], sagemaker_session=pipeline_session, ) assert json.loads(pipeline.definition())["Steps"][0] == { "Name": "MyTuningStep", "Type": "Tuning", "Arguments": step_args, }
def test_processing_step_with_processor(pipeline_session, processing_input): processor = Processor( image_uri=IMAGE_URI, role=sagemaker.get_execution_role(), instance_count=1, instance_type=INSTANCE_TYPE, sagemaker_session=pipeline_session, ) with warnings.catch_warnings(record=True) as w: step_args = processor.run(inputs=processing_input) assert len(w) == 1 assert issubclass(w[-1].category, UserWarning) assert "Running within a PipelineSession" in str(w[-1].message) cache_config = CacheConfig(enable_caching=True, expire_after="PT1H") evaluation_report = PropertyFile( name="EvaluationReport", output_name="evaluation", path="evaluation.json" ) with warnings.catch_warnings(record=True) as w: step = ProcessingStep( name="MyProcessingStep", step_args=step_args, description="ProcessingStep description", display_name="MyProcessingStep", depends_on=["TestStep", "SecondTestStep"], cache_config=cache_config, property_files=[evaluation_report], ) assert len(w) == 0 pipeline = Pipeline( name="MyPipeline", steps=[step], sagemaker_session=pipeline_session, ) assert json.loads(pipeline.definition())["Steps"][0] == { "Name": "MyProcessingStep", "Description": "ProcessingStep description", "DisplayName": "MyProcessingStep", "Type": "Processing", "DependsOn": ["TestStep", "SecondTestStep"], "Arguments": step_args, "CacheConfig": {"Enabled": True, "ExpireAfter": "PT1H"}, "PropertyFiles": [ { "FilePath": "evaluation.json", "OutputName": "evaluation", "PropertyFileName": "EvaluationReport", } ], } assert step.properties.ProcessingJobName.expr == { "Get": "Steps.MyProcessingStep.ProcessingJobName" }
def test_pipeline_describe(sagemaker_session_mock): pipeline = Pipeline( name="MyPipeline", parameters=[], steps=[], sagemaker_session=sagemaker_session_mock, ) pipeline.describe() assert sagemaker_session_mock.sagemaker_client.describe_pipeline.called_with( PipelineName="MyPipeline", )
def test_pipeline_start_before_creation(sagemaker_session_mock): sagemaker_session_mock.sagemaker_client.describe_pipeline.side_effect = ClientError({}, "bar") pipeline = Pipeline( name="MyPipeline", parameters=[ParameterString("alpha", "beta"), ParameterString("gamma", "delta")], steps=[], sagemaker_session=sagemaker_session_mock, ) with pytest.raises(ValueError): pipeline.start()
def test_pipeline_basic(): parameter = ParameterString("MyStr") pipeline = Pipeline( name="MyPipeline", parameters=[parameter], steps=[CustomStep(name="MyStep", input_data=parameter)], sagemaker_session=sagemaker_session_mock, ) assert pipeline.to_request() == { "Version": "2020-12-01", "Metadata": {}, "Parameters": [{ "Name": "MyStr", "Type": "String" }], "PipelineExperimentConfig": { "ExperimentName": ExecutionVariables.PIPELINE_NAME, "TrialName": ExecutionVariables.PIPELINE_EXECUTION_ID, }, "Steps": [{ "Name": "MyStep", "Type": "Training", "Arguments": { "input_data": parameter } }], } assert ordered(json.loads(pipeline.definition())) == ordered({ "Version": "2020-12-01", "Metadata": {}, "Parameters": [{ "Name": "MyStr", "Type": "String" }], "PipelineExperimentConfig": { "ExperimentName": { "Get": "Execution.PipelineName" }, "TrialName": { "Get": "Execution.PipelineExecutionId" }, }, "Steps": [{ "Name": "MyStep", "Type": "Training", "Arguments": { "input_data": { "Get": "Parameters.MyStr" } }, }], })
def test_pipeline_create(sagemaker_session_mock, role_arn): pipeline = Pipeline( name="MyPipeline", parameters=[], steps=[], sagemaker_session=sagemaker_session_mock, ) pipeline.create(role_arn=role_arn) assert sagemaker_session_mock.sagemaker_client.create_pipeline.called_with( PipelineName="MyPipeline", PipelineDefinition=pipeline.definition(), RoleArn=role_arn )
def test_pipeline_interpolates_callback_outputs(): parameter = ParameterString("MyStr") outputParam1 = CallbackOutput(output_name="output1", output_type=CallbackOutputTypeEnum.String) outputParam2 = CallbackOutput(output_name="output2", output_type=CallbackOutputTypeEnum.String) cb_step1 = CallbackStep( name="MyCallbackStep1", depends_on=["TestStep"], sqs_queue_url="https://sqs.us-east-2.amazonaws.com/123456789012/MyQueue", inputs={"arg1": "foo"}, outputs=[outputParam1], ) cb_step2 = CallbackStep( name="MyCallbackStep2", depends_on=["TestStep"], sqs_queue_url="https://sqs.us-east-2.amazonaws.com/123456789012/MyQueue", inputs={"arg1": outputParam1}, outputs=[outputParam2], ) pipeline = Pipeline( name="MyPipeline", parameters=[parameter], steps=[cb_step1, cb_step2], sagemaker_session=sagemaker_session_mock, ) assert json.loads(pipeline.definition()) == { "Version": "2020-12-01", "Metadata": {}, "Parameters": [{"Name": "MyStr", "Type": "String"}], "PipelineExperimentConfig": { "ExperimentName": {"Get": "Execution.PipelineName"}, "TrialName": {"Get": "Execution.PipelineExecutionId"}, }, "Steps": [ { "Name": "MyCallbackStep1", "Type": "Callback", "Arguments": {"arg1": "foo"}, "DependsOn": ["TestStep"], "SqsQueueUrl": "https://sqs.us-east-2.amazonaws.com/123456789012/MyQueue", "OutputParameters": [{"OutputName": "output1", "OutputType": "String"}], }, { "Name": "MyCallbackStep2", "Type": "Callback", "Arguments": {"arg1": {"Get": "Steps.MyCallbackStep1.OutputParameters['output1']"}}, "DependsOn": ["TestStep"], "SqsQueueUrl": "https://sqs.us-east-2.amazonaws.com/123456789012/MyQueue", "OutputParameters": [{"OutputName": "output2", "OutputType": "String"}], }, ], }
def test_pipeline_start(sagemaker_session_mock): sagemaker_session_mock.sagemaker_client.start_pipeline_execution.return_value = { "PipelineExecutionArn": "my:arn" } pipeline = Pipeline( name="MyPipeline", parameters=[ ParameterString("alpha", "beta"), ParameterString("gamma", "delta") ], steps=[], sagemaker_session=sagemaker_session_mock, ) pipeline.start() assert sagemaker_session_mock.start_pipeline_execution.called_with( PipelineName="MyPipeline", ) pipeline.start(execution_display_name="pipeline-execution") assert sagemaker_session_mock.start_pipeline_execution.called_with( PipelineName="MyPipeline", PipelineExecutionDisplayName="pipeline-execution") pipeline.start(parameters=dict(alpha="epsilon")) assert sagemaker_session_mock.start_pipeline_execution.called_with( PipelineName="MyPipeline", PipelineParameters=[{ "Name": "alpha", "Value": "epsilon" }])
def test_transform_step_with_transformer(pipeline_session): model_name = ParameterString("ModelName") transformer = Transformer( model_name=model_name, instance_type="ml.m5.xlarge", instance_count=1, output_path=f"s3://{pipeline_session.default_bucket()}/Transform", sagemaker_session=pipeline_session, ) transform_inputs = TransformInput( data=f"s3://{pipeline_session.default_bucket()}/batch-data", ) with warnings.catch_warnings(record=True) as w: step_args = transformer.transform( data=transform_inputs.data, data_type=transform_inputs.data_type, content_type=transform_inputs.content_type, compression_type=transform_inputs.compression_type, split_type=transform_inputs.split_type, input_filter=transform_inputs.input_filter, output_filter=transform_inputs.output_filter, join_source=transform_inputs.join_source, model_client_config=transform_inputs.model_client_config, ) assert len(w) == 1 assert issubclass(w[-1].category, UserWarning) assert "Running within a PipelineSession" in str(w[-1].message) with warnings.catch_warnings(record=True) as w: step = TransformStep( name="MyTransformStep", step_args=step_args, ) assert len(w) == 0 pipeline = Pipeline( name="MyPipeline", steps=[step], parameters=[model_name], sagemaker_session=pipeline_session, ) step_args["ModelName"] = model_name.expr assert json.loads(pipeline.definition())["Steps"][0] == { "Name": "MyTransformStep", "Type": "Transform", "Arguments": step_args, }
def test_two_step_lambda_pipeline_with_output_reference( sagemaker_session, role, pipeline_name, region_name): instance_count = ParameterInteger(name="InstanceCount", default_value=2) outputParam1 = LambdaOutput(output_name="output1", output_type=LambdaOutputTypeEnum.String) step_lambda1 = LambdaStep( name="lambda-step1", lambda_func=Lambda( function_arn= ("arn:aws:lambda:us-west-2:123456789012:function:sagemaker_test_lambda" ), session=sagemaker_session, ), inputs={"arg1": "foo"}, outputs=[outputParam1], ) step_lambda2 = LambdaStep( name="lambda-step2", lambda_func=Lambda( function_arn= ("arn:aws:lambda:us-west-2:123456789012:function:sagemaker_test_lambda" ), session=sagemaker_session, ), inputs={"arg1": outputParam1}, outputs=[], ) pipeline = Pipeline( name=pipeline_name, parameters=[instance_count], steps=[step_lambda1, step_lambda2], sagemaker_session=sagemaker_session, ) try: response = pipeline.create(role) create_arn = response["PipelineArn"] assert re.match( rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}", create_arn, ) finally: try: pipeline.delete() except Exception: pass
def verfiy(step_args): step = ProcessingStep( name="MyProcessingStep", step_args=step_args, ) pipeline = Pipeline( name="MyPipeline", steps=[step], sagemaker_session=pipeline_session, ) assert json.loads(pipeline.definition())["Steps"][0] == { "Name": "MyProcessingStep", "Type": "Processing", "Arguments": step_args, }
def test_pipeline_create_with_parallelism_config(sagemaker_session_mock, role_arn): pipeline = Pipeline( name="MyPipeline", parameters=[], steps=[], pipeline_experiment_config=ParallelismConfiguration(max_parallel_execution_steps=10), sagemaker_session=sagemaker_session_mock, ) pipeline.create(role_arn=role_arn) assert sagemaker_session_mock.sagemaker_client.create_pipeline.called_with( PipelineName="MyPipeline", PipelineDefinition=pipeline.definition(), RoleArn=role_arn, ParallelismConfiguration={"MaxParallelExecutionSteps": 10}, )
def test_two_steps_emr_pipeline(sagemaker_session, role, pipeline_name, region_name): instance_count = ParameterInteger(name="InstanceCount", default_value=2) emr_step_config = EMRStepConfig( jar= "s3://us-west-2.elasticmapreduce/libs/script-runner/script-runner.jar", args=["dummy_emr_script_path"], ) step_emr_1 = EMRStep( name="emr-step-1", cluster_id="j-1YONHTCP3YZKC", display_name="emr_step_1", description="MyEMRStepDescription", step_config=emr_step_config, ) step_emr_2 = EMRStep( name="emr-step-2", cluster_id=step_emr_1.properties.ClusterId, display_name="emr_step_2", description="MyEMRStepDescription", step_config=emr_step_config, ) pipeline = Pipeline( name=pipeline_name, parameters=[instance_count], steps=[step_emr_1, step_emr_2], sagemaker_session=sagemaker_session, ) try: response = pipeline.create(role) create_arn = response["PipelineArn"] assert re.match( rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}", create_arn, ) finally: try: pipeline.delete() except Exception: pass
def test_create_and_update_with_parallelism_config( sagemaker_session, role, pipeline_name, region_name ): instance_count = ParameterInteger(name="InstanceCount", default_value=2) outputParam = CallbackOutput(output_name="output", output_type=CallbackOutputTypeEnum.String) callback_steps = [ CallbackStep( name=f"callback-step{count}", sqs_queue_url="https://sqs.us-east-2.amazonaws.com/123456789012/MyQueue", inputs={"arg1": "foo"}, outputs=[outputParam], ) for count in range(500) ] pipeline = Pipeline( name=pipeline_name, parameters=[instance_count], steps=callback_steps, sagemaker_session=sagemaker_session, ) try: response = pipeline.create(role, parallelism_config={"MaxParallelExecutionSteps": 50}) create_arn = response["PipelineArn"] assert re.match( rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}", create_arn, ) response = pipeline.describe() assert response["ParallelismConfiguration"]["MaxParallelExecutionSteps"] == 50 pipeline.parameters = [ParameterInteger(name="InstanceCount", default_value=1)] response = pipeline.update(role, parallelism_config={"MaxParallelExecutionSteps": 55}) update_arn = response["PipelineArn"] assert re.match( rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}", update_arn, ) response = pipeline.describe() assert response["ParallelismConfiguration"]["MaxParallelExecutionSteps"] == 55 finally: try: pipeline.delete() except Exception: pass
def test_training_step_with_estimator(pipeline_session, training_input, hyperparameters): estimator = Estimator( role=sagemaker.get_execution_role(), instance_count=1, instance_type=INSTANCE_TYPE, sagemaker_session=pipeline_session, image_uri=IMAGE_URI, hyperparameters=hyperparameters, ) with warnings.catch_warnings(record=True) as w: step_args = estimator.fit(inputs=training_input) assert len(w) == 1 assert issubclass(w[-1].category, UserWarning) assert "Running within a PipelineSession" in str(w[-1].message) with warnings.catch_warnings(record=True) as w: step = TrainingStep( name="MyTrainingStep", step_args=step_args, description="TrainingStep description", display_name="MyTrainingStep", depends_on=["TestStep", "SecondTestStep"], ) assert len(w) == 0 pipeline = Pipeline( name="MyPipeline", steps=[step], sagemaker_session=pipeline_session, ) assert json.loads(pipeline.definition())["Steps"][0] == { "Name": "MyTrainingStep", "Description": "TrainingStep description", "DisplayName": "MyTrainingStep", "Type": "Training", "DependsOn": ["TestStep", "SecondTestStep"], "Arguments": step_args, } assert step.properties.TrainingJobName.expr == { "Get": "Steps.MyTrainingStep.TrainingJobName" }
def test_training_step_with_algorithm_base(algo_estimator, pipeline_session): estimator = algo_estimator( role=sagemaker.get_execution_role(), instance_type=INSTANCE_TYPE, instance_count=1, sagemaker_session=pipeline_session, ) data = RecordSet( "s3://{}/{}".format(pipeline_session.default_bucket(), "dummy"), num_records=1000, feature_dim=128, channel="train", ) with warnings.catch_warnings(record=True) as w: step_args = estimator.fit( records=data, mini_batch_size=1000, ) assert len(w) == 1 assert issubclass(w[-1].category, UserWarning) assert "Running within a PipelineSession" in str(w[-1].message) with warnings.catch_warnings(record=True) as w: step = TrainingStep( name="MyTrainingStep", step_args=step_args, ) assert len(w) == 0 pipeline = Pipeline( name="MyPipeline", steps=[step], sagemaker_session=pipeline_session, ) assert json.loads(pipeline.definition())["Steps"][0] == { "Name": "MyTrainingStep", "Type": "Training", "Arguments": step_args, } assert step.properties.TrainingJobName.expr == { "Get": "Steps.MyTrainingStep.TrainingJobName" }
def get_pipeline( region, default_bucket='sagemaker-us-east-1-376337229415', model_package_group_name="AbaloneExample", pipeline_name="AbaloneExample", base_job_prefix="abalone", ): """Gets a SageMaker ML Pipeline instance working with on CustomerChurn data. Args: region: AWS region to create and run the pipeline. default_bucket: the bucket to use for storing the artifacts Returns: an instance of a pipeline """ sagemaker_session = get_session(region, default_bucket) role = sagemaker.session.get_execution_role(sagemaker_session) input_data = ParameterString( name="InputDataUrl", default_value='s3://{}/abalone/abalone-dataset.csv'.format(default_bucket), ) step_process = processing_job(processing_instance_type, processing_instance_count, sagemaker_session, role) # Training step for generating model artifacts model_path = f"s3://{sagemaker_session.default_bucket()}/{base_job_prefix}/Abalonetrain" image_uri = get_image_uri(framework="xgboost", region=region, version="1.0-1", py_version="py3", training_instance_type=training_instance_type) step_train = training_job(image_uri, training_instance_type, training_instance_count, model_path, sagemaker_session, role) # Pipeline instance pipeline = Pipeline( name=pipeline_name, parameters=[ processing_instance_type, processing_instance_count, training_instance_type, model_approval_status, input_data, ], steps=[step_process, step_train], sagemaker_session=sagemaker_session, ) return pipeline
def test_pipeline_basic(): parameter = ParameterString("MyStr") pipeline = Pipeline( name="MyPipeline", parameters=[parameter], steps=[CustomStep(name="MyStep", input_data=parameter)], sagemaker_session=sagemaker_session_mock, ) assert pipeline.to_request() == { "Version": "2020-12-01", "Metadata": {}, "Parameters": [{ "Name": "MyStr", "Type": "String" }], "Steps": [{ "Name": "MyStep", "Type": "Training", "Arguments": { "input_data": parameter } }], } assert ordered(json.loads(pipeline.definition())) == ordered({ "Version": "2020-12-01", "Metadata": {}, "Parameters": [{ "Name": "MyStr", "Type": "String" }], "Steps": [{ "Name": "MyStep", "Type": "Training", "Arguments": { "input_data": { "Get": "Parameters.MyStr" } }, }], })
def test_model_bias_check_step( sagemaker_session, check_job_config, model_package_group_name, data_config, bias_config, model_config, predictions_config, ): model_bias_check_config = ModelBiasCheckConfig( data_config=data_config, data_bias_config=bias_config, model_config=model_config, model_predicted_label_config=predictions_config, methods="all", ) model_bias_check_step = ClarifyCheckStep( name="ModelBiasCheckStep", clarify_check_config=model_bias_check_config, check_job_config=check_job_config, skip_check=False, register_new_baseline=False, model_package_group_name=model_package_group_name, supplied_baseline_constraints="supplied_baseline_constraints", ) pipeline = Pipeline( name="MyPipeline", parameters=[model_package_group_name], steps=[model_bias_check_step], sagemaker_session=sagemaker_session, ) assert json.loads( pipeline.definition())["Steps"][0] == _expected_model_bias_dsl assert re.match( f"s3://{_DEFAULT_BUCKET}/{_MODEL_MONITOR_S3_PATH}" + f"/{_BIAS_MONITORING_CFG_BASE_NAME}-configuration" + f"/{_BIAS_MONITORING_CFG_BASE_NAME}-config.*/.*/analysis_config.json", model_bias_check_config.monitoring_analysis_config_uri, )
def test_two_step_fail_pipeline_with_str_err_msg(sagemaker_session, role, pipeline_name): param = ParameterInteger(name="MyInt", default_value=2) cond = ConditionEquals(left=param, right=1) step_fail = FailStep( name="FailStep", error_message="Failed due to hitting in else branch", ) step_cond = ConditionStep( name="CondStep", conditions=[cond], if_steps=[], else_steps=[step_fail], ) pipeline = Pipeline( name=pipeline_name, steps=[step_cond], sagemaker_session=sagemaker_session, parameters=[param], ) try: response = pipeline.create(role) pipeline_arn = response["PipelineArn"] execution = pipeline.start(parameters={}) response = execution.describe() assert response["PipelineArn"] == pipeline_arn try: execution.wait(delay=30, max_attempts=60) except WaiterError: pass execution_steps = execution.list_steps() assert len(execution_steps) == 2 for execution_step in execution_steps: if execution_step["StepName"] == "CondStep": assert execution_step["StepStatus"] == "Succeeded" continue assert execution_step["StepName"] == "FailStep" assert execution_step["StepStatus"] == "Failed" assert execution_step[ "FailureReason"] == "Failed due to hitting in else branch" metadata = execution_steps[0]["Metadata"]["Fail"] assert metadata[ "ErrorMessage"] == "Failed due to hitting in else branch" # Check FailureReason field in ListPipelineExecutions executions = sagemaker_session.sagemaker_client.list_pipeline_executions( PipelineName=pipeline.name)["PipelineExecutionSummaries"] assert len(executions) == 1 assert executions[0]["PipelineExecutionStatus"] == "Failed" assert ("Step failure: One or multiple steps failed" in executions[0]["PipelineExecutionFailureReason"]) finally: try: pipeline.delete() except Exception: pass
def test_invalid_pipeline_depended_on_fail_step(sagemaker_session, role, pipeline_name): param = ParameterInteger(name="MyInt", default_value=2) cond = ConditionEquals(left=param, right=1) step_fail = FailStep( name="FailStep", error_message="Failed pipeline execution", ) step_cond = ConditionStep( name="CondStep", conditions=[cond], if_steps=[], else_steps=[], depends_on=["FailStep"], ) pipeline = Pipeline( name=pipeline_name, steps=[step_cond, step_fail], sagemaker_session=sagemaker_session, parameters=[param], ) try: with pytest.raises(Exception) as error: pipeline.create(role) assert "CondStep can not depends on FailStep" in str(error.value) finally: try: pipeline.delete() except Exception: pass
def test_pipeline_upsert(sagemaker_session_mock, role_arn): sagemaker_session_mock.side_effect = [ ClientError( operation_name="CreatePipeline", error_response={ "Error": { "Code": "ValidationException", "Message": "Pipeline names must be unique within ...", } }, ), {}, ] pipeline = Pipeline( name="MyPipeline", parameters=[], steps=[], sagemaker_session=sagemaker_session_mock, ) pipeline.update(role_arn=role_arn) assert sagemaker_session_mock.sagemaker_client.create_pipeline.called_with( PipelineName="MyPipeline", PipelineDefinition=pipeline.definition(), RoleArn=role_arn ) assert sagemaker_session_mock.sagemaker_client.update_pipeline.called_with( PipelineName="MyPipeline", PipelineDefinition=pipeline.definition(), RoleArn=role_arn )
def test_data_bias_check_step(sagemaker_session, check_job_config, model_package_group_name, bias_config): data_bias_data_config = DataConfig( s3_data_input_path=_S3_INPUT_PATH, s3_output_path=_S3_OUTPUT_PATH, s3_analysis_config_output_path=_S3_ANALYSIS_CONFIG_OUTPUT_PATH, label="fraud", dataset_type="text/csv", ) data_bias_check_config = DataBiasCheckConfig( data_config=data_bias_data_config, data_bias_config=bias_config, methods="all", kms_key="kms_key", ) data_bias_check_step = ClarifyCheckStep( name="DataBiasCheckStep", clarify_check_config=data_bias_check_config, check_job_config=check_job_config, skip_check=False, register_new_baseline=False, model_package_group_name=model_package_group_name, supplied_baseline_constraints="supplied_baseline_constraints", cache_config=CacheConfig(enable_caching=True, expire_after="PT1H"), ) pipeline = Pipeline( name="MyPipeline", parameters=[model_package_group_name], steps=[data_bias_check_step], sagemaker_session=sagemaker_session, ) assert json.loads( pipeline.definition())["Steps"][0] == _expected_data_bias_dsl assert re.match( f"{_S3_ANALYSIS_CONFIG_OUTPUT_PATH}/{_BIAS_MONITORING_CFG_BASE_NAME}-configuration" + f"/{_BIAS_MONITORING_CFG_BASE_NAME}-config.*/.*/analysis_config.json", data_bias_check_config.monitoring_analysis_config_uri, )
def test_lambda_step(sagemaker_session): param = ParameterInteger(name="MyInt") output_param1 = LambdaOutput(output_name="output1", output_type=LambdaOutputTypeEnum.String) output_param2 = LambdaOutput(output_name="output2", output_type=LambdaOutputTypeEnum.Boolean) cache_config = CacheConfig(enable_caching=True, expire_after="PT1H") lambda_step = LambdaStep( name="MyLambdaStep", depends_on=["TestStep"], lambda_func=Lambda( function_arn="arn:aws:lambda:us-west-2:123456789012:function:sagemaker_test_lambda", session=sagemaker_session, ), display_name="MyLambdaStep", description="MyLambdaStepDescription", inputs={"arg1": "foo", "arg2": 5, "arg3": param}, outputs=[output_param1, output_param2], cache_config=cache_config, ) lambda_step.add_depends_on(["SecondTestStep"]) pipeline = Pipeline( name="MyPipeline", parameters=[param], steps=[lambda_step], sagemaker_session=sagemaker_session, ) assert json.loads(pipeline.definition())["Steps"][0] == { "Name": "MyLambdaStep", "Type": "Lambda", "DependsOn": ["TestStep", "SecondTestStep"], "DisplayName": "MyLambdaStep", "Description": "MyLambdaStepDescription", "FunctionArn": "arn:aws:lambda:us-west-2:123456789012:function:sagemaker_test_lambda", "OutputParameters": [ {"OutputName": "output1", "OutputType": "String"}, {"OutputName": "output2", "OutputType": "Boolean"}, ], "Arguments": {"arg1": "foo", "arg2": 5, "arg3": {"Get": "Parameters.MyInt"}}, "CacheConfig": {"Enabled": True, "ExpireAfter": "PT1H"}, }