def test_two_step_fail_pipeline_with_str_err_msg(sagemaker_session, role, pipeline_name): param = ParameterInteger(name="MyInt", default_value=2) cond = ConditionEquals(left=param, right=1) step_fail = FailStep( name="FailStep", error_message="Failed due to hitting in else branch", ) step_cond = ConditionStep( name="CondStep", conditions=[cond], if_steps=[], else_steps=[step_fail], ) pipeline = Pipeline( name=pipeline_name, steps=[step_cond], sagemaker_session=sagemaker_session, parameters=[param], ) try: response = pipeline.create(role) pipeline_arn = response["PipelineArn"] execution = pipeline.start(parameters={}) response = execution.describe() assert response["PipelineArn"] == pipeline_arn try: execution.wait(delay=30, max_attempts=60) except WaiterError: pass execution_steps = execution.list_steps() assert len(execution_steps) == 2 for execution_step in execution_steps: if execution_step["StepName"] == "CondStep": assert execution_step["StepStatus"] == "Succeeded" continue assert execution_step["StepName"] == "FailStep" assert execution_step["StepStatus"] == "Failed" assert execution_step[ "FailureReason"] == "Failed due to hitting in else branch" metadata = execution_steps[0]["Metadata"]["Fail"] assert metadata[ "ErrorMessage"] == "Failed due to hitting in else branch" # Check FailureReason field in ListPipelineExecutions executions = sagemaker_session.sagemaker_client.list_pipeline_executions( PipelineName=pipeline.name)["PipelineExecutionSummaries"] assert len(executions) == 1 assert executions[0]["PipelineExecutionStatus"] == "Failed" assert ("Step failure: One or multiple steps failed" in executions[0]["PipelineExecutionFailureReason"]) finally: try: pipeline.delete() except Exception: pass
def test_fail_step_with_join_fn_in_error_message(): param = ParameterInteger(name="MyInt", default_value=2) cond = ConditionEquals(left=param, right=1) step_cond = ConditionStep( name="CondStep", conditions=[cond], if_steps=[], else_steps=[], ) step_fail = FailStep( name="FailStep", error_message=Join(on=": ", values=[ "Failed due to xxx == yyy returns", step_cond.properties.Outcome ]), ) pipeline = Pipeline( name="MyPipeline", steps=[step_cond, step_fail], parameters=[param], ) _expected_dsl = [ { "Name": "CondStep", "Type": "Condition", "Arguments": { "Conditions": [{ "Type": "Equals", "LeftValue": { "Get": "Parameters.MyInt" }, "RightValue": 1 }], "IfSteps": [], "ElseSteps": [], }, }, { "Name": "FailStep", "Type": "Fail", "Arguments": { "ErrorMessage": { "Std:Join": { "On": ": ", "Values": [ "Failed due to xxx == yyy returns", { "Get": "Steps.CondStep.Outcome" }, ], } } }, }, ] assert json.loads(pipeline.definition())["Steps"] == _expected_dsl
def test_invalid_pipeline_depended_on_fail_step(sagemaker_session, role, pipeline_name): param = ParameterInteger(name="MyInt", default_value=2) cond = ConditionEquals(left=param, right=1) step_fail = FailStep( name="FailStep", error_message="Failed pipeline execution", ) step_cond = ConditionStep( name="CondStep", conditions=[cond], if_steps=[], else_steps=[], depends_on=["FailStep"], ) pipeline = Pipeline( name=pipeline_name, steps=[step_cond, step_fail], sagemaker_session=sagemaker_session, parameters=[param], ) try: with pytest.raises(Exception) as error: pipeline.create(role) assert "CondStep can not depends on FailStep" in str(error.value) finally: try: pipeline.delete() except Exception: pass
def test_condition_step(): param = ParameterInteger(name="MyInt") cond = ConditionEquals(left=param, right=1) step1 = CustomStep("MyStep1") step2 = CustomStep("MyStep2") cond_step = ConditionStep( name="MyConditionStep", depends_on=["TestStep"], conditions=[cond], if_steps=[step1], else_steps=[step2], ) cond_step.add_depends_on(["SecondTestStep"]) assert cond_step.to_request() == { "Name": "MyConditionStep", "Type": "Condition", "DependsOn": ["TestStep", "SecondTestStep"], "Arguments": { "Conditions": [ { "Type": "Equals", "LeftValue": { "Get": "Parameters.MyInt" }, "RightValue": 1, }, ], "IfSteps": [ { "Name": "MyStep1", "Type": "Training", "Arguments": {}, }, ], "ElseSteps": [{ "Name": "MyStep2", "Type": "Training", "Arguments": {}, }], }, } assert cond_step.properties.Outcome.expr == { "Get": "Steps.MyConditionStep.Outcome" }
def test_model_registration_with_model_repack( sagemaker_session, role, pipeline_name, region_name, ): base_dir = os.path.join(DATA_DIR, "pytorch_mnist") entry_point = os.path.join(base_dir, "mnist.py") input_path = sagemaker_session.upload_data( path=os.path.join(base_dir, "training"), key_prefix="integ-test-data/pytorch_mnist/training", ) inputs = TrainingInput(s3_data=input_path) instance_count = ParameterInteger(name="InstanceCount", default_value=1) instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge") good_enough_input = ParameterInteger(name="GoodEnoughInput", default_value=1) pytorch_estimator = PyTorch( entry_point=entry_point, role=role, framework_version="1.5.0", py_version="py3", instance_count=instance_count, instance_type=instance_type, sagemaker_session=sagemaker_session, ) step_train = TrainingStep( name="pytorch-train", estimator=pytorch_estimator, inputs=inputs, ) step_register = RegisterModel( name="pytorch-register-model", estimator=pytorch_estimator, model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts, content_types=["*"], response_types=["*"], inference_instances=["*"], transform_instances=["*"], description="test-description", entry_point=entry_point, ) model = Model( image_uri=pytorch_estimator.training_image_uri(), model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts, sagemaker_session=sagemaker_session, role=role, ) model_inputs = CreateModelInput( instance_type="ml.m5.large", accelerator_type="ml.eia1.medium", ) step_model = CreateModelStep( name="pytorch-model", model=model, inputs=model_inputs, ) step_cond = ConditionStep( name="cond-good-enough", conditions=[ ConditionGreaterThanOrEqualTo(left=good_enough_input, right=1) ], if_steps=[step_train, step_register], else_steps=[step_model], ) pipeline = Pipeline( name=pipeline_name, parameters=[good_enough_input, instance_count, instance_type], steps=[step_cond], sagemaker_session=sagemaker_session, ) try: response = pipeline.create(role) create_arn = response["PipelineArn"] assert re.match( fr"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}", create_arn) execution = pipeline.start(parameters={}) assert re.match( fr"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}/execution/", execution.arn, ) execution = pipeline.start(parameters={"GoodEnoughInput": 0}) assert re.match( fr"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}/execution/", execution.arn, ) finally: try: pipeline.delete() except Exception: pass
def get_pipeline( region, sagemaker_project_arn=None, role=None, default_bucket=None, model_package_group_name="restatePackageGroup", # Choose any name pipeline_name="restate-p-XXXXXXXXX", # You can find your pipeline name in the Studio UI (project -> Pipelines -> name) base_job_prefix="restate", # Choose any name ): """Gets a SageMaker ML Pipeline instance working with on RE data. Args: region: AWS region to create and run the pipeline. role: IAM role to create and run steps and pipeline. default_bucket: the bucket to use for storing the artifacts Returns: an instance of a pipeline """ sagemaker_session = get_session(region, default_bucket) if role is None: role = sagemaker.session.get_execution_role(sagemaker_session) # Parameters for pipeline execution processing_instance_count = ParameterInteger(name="ProcessingInstanceCount", default_value=1) processing_instance_type = ParameterString( name="ProcessingInstanceType", default_value="ml.m5.2xlarge" ) training_instance_type = ParameterString( name="TrainingInstanceType", default_value="ml.m5.xlarge" ) model_approval_status = ParameterString( name="ModelApprovalStatus", default_value="PendingManualApproval", # ModelApprovalStatus can be set to a default of "Approved" if you don't want manual approval. ) input_data = ParameterString( name="InputDataUrl", default_value=f"", # Change this to point to the s3 location of your raw input data. ) data_sources = [] # Sagemaker session sess = sagemaker_session # You can configure this with your own bucket name, e.g. # bucket = "my-bucket" bucket = sess.default_bucket() data_sources.append( ProcessingInput( input_name="restate-california", dataset_definition=DatasetDefinition( local_path="/opt/ml/processing/restate-california", data_distribution_type="FullyReplicated", # You can override below to point to other database or use different queries athena_dataset_definition=AthenaDatasetDefinition( catalog="AwsDataCatalog", database="restate", query_string="SELECT * FROM restate.california_10", output_s3_uri=f"s3://{bucket}/athena/", output_format="PARQUET", ), ), ) ) print(f"Data Wrangler export storage bucket: {bucket}") # unique flow export ID flow_export_id = f"{time.strftime('%d-%H-%M-%S', time.gmtime())}-{str(uuid.uuid4())[:8]}" flow_export_name = f"flow-{flow_export_id}" # Output name is auto-generated from the select node's ID + output name from the flow file. output_name = "99ae1ec3-dd5f-453c-bfae-721dac423cd7.default" s3_output_prefix = f"export-{flow_export_name}/output" s3_output_path = f"s3://{bucket}/{s3_output_prefix}" print(f"Flow S3 export result path: {s3_output_path}") processing_job_output = ProcessingOutput( output_name=output_name, source="/opt/ml/processing/output", destination=s3_output_path, s3_upload_mode="EndOfJob", ) # name of the flow file which should exist in the current notebook working directory flow_file_name = "sagemaker-pipeline/restate-athena-california.flow" # Load .flow file from current notebook working directory #!echo "Loading flow file from current notebook working directory: $PWD" with open(flow_file_name) as f: flow = json.load(f) # Upload flow to S3 s3_client = boto3.client("s3") s3_client.upload_file( flow_file_name, bucket, f"data_wrangler_flows/{flow_export_name}.flow", ExtraArgs={"ServerSideEncryption": "aws:kms"}, ) flow_s3_uri = f"s3://{bucket}/data_wrangler_flows/{flow_export_name}.flow" print(f"Data Wrangler flow {flow_file_name} uploaded to {flow_s3_uri}") ## Input - Flow: restate-athena-russia.flow flow_input = ProcessingInput( source=flow_s3_uri, destination="/opt/ml/processing/flow", input_name="flow", s3_data_type="S3Prefix", s3_input_mode="File", s3_data_distribution_type="FullyReplicated", ) # IAM role for executing the processing job. iam_role = role # Unique processing job name. Give a unique name every time you re-execute processing jobs processing_job_name = f"data-wrangler-flow-processing-{flow_export_id}" # Data Wrangler Container URL. container_uri = sagemaker.image_uris.retrieve( framework="data-wrangler", # we are using the Sagemaker built in xgboost algorithm region=region, ) # Processing Job Instance count and instance type. instance_count = 2 instance_type = "ml.m5.4xlarge" # Size in GB of the EBS volume to use for storing data during processing volume_size_in_gb = 30 # Content type for each output. Data Wrangler supports CSV as default and Parquet. output_content_type = "CSV" # Network Isolation mode; default is off enable_network_isolation = False # List of tags to be passed to the processing job user_tags = [] # Output configuration used as processing job container arguments output_config = {output_name: {"content_type": output_content_type}} # KMS key for per object encryption; default is None kms_key = None processor = Processor( role=iam_role, image_uri=container_uri, instance_count=instance_count, instance_type=instance_type, volume_size_in_gb=volume_size_in_gb, network_config=NetworkConfig(enable_network_isolation=enable_network_isolation), sagemaker_session=sess, output_kms_key=kms_key, tags=user_tags, ) data_wrangler_step = ProcessingStep( name="DataWranglerProcess", processor=processor, inputs=[flow_input] + data_sources, outputs=[processing_job_output], job_arguments=[f"--output-config '{json.dumps(output_config)}'"], ) # Processing step for feature engineering # this processor does not have awswrangler installed sklearn_processor = SKLearnProcessor( framework_version="0.23-1", instance_type=processing_instance_type, instance_count=processing_instance_count, base_job_name=f"{base_job_prefix}/sklearn-restate-preprocess", # choose any name sagemaker_session=sagemaker_session, role=role, ) step_process = ProcessingStep( name="Preprocess", # choose any name processor=sklearn_processor, inputs=[ ProcessingInput( source=data_wrangler_step.properties.ProcessingOutputConfig.Outputs[ output_name ].S3Output.S3Uri, destination="/opt/ml/processing/data/raw-data-dir", ) ], outputs=[ ProcessingOutput(output_name="train", source="/opt/ml/processing/train"), ProcessingOutput(output_name="validation", source="/opt/ml/processing/validation"), ProcessingOutput(output_name="test", source="/opt/ml/processing/test"), ], code=os.path.join(BASE_DIR, "preprocess.py"), job_arguments=[ "--input-data", data_wrangler_step.properties.ProcessingOutputConfig.Outputs[ output_name ].S3Output.S3Uri, ], ) # Training step for generating model artifacts model_path = f"s3://{sagemaker_session.default_bucket()}/{base_job_prefix}/restateTrain" model_bucket_key = f"{sagemaker_session.default_bucket()}/{base_job_prefix}/restateTrain" cache_config = CacheConfig(enable_caching=True, expire_after="30d") xgb_image_uri = sagemaker.image_uris.retrieve( framework="xgboost", # we are using the Sagemaker built in xgboost algorithm region=region, version="1.0-1", py_version="py3", instance_type=training_instance_type, ) xgb_train = Estimator( image_uri=xgb_image_uri, instance_type=training_instance_type, instance_count=1, output_path=model_path, base_job_name=f"{base_job_prefix}/restate-xgb-train", sagemaker_session=sagemaker_session, role=role, ) xgb_train.set_hyperparameters( # #objective="binary:logistic", # objective="reg:linear", num_round=50, # max_depth=5, # eta=0.2, # gamma=4, # min_child_weight=6, # subsample=0.7, # silent=0, ) xgb_train.set_hyperparameters(grow_policy="lossguide") xgb_objective_metric_name = "validation:mse" xgb_hyperparameter_ranges = { "max_depth": IntegerParameter(2, 10, scaling_type="Linear"), } xgb_tuner_log = HyperparameterTuner( xgb_train, xgb_objective_metric_name, xgb_hyperparameter_ranges, max_jobs=3, max_parallel_jobs=3, strategy="Random", objective_type="Minimize", ) xgb_step_tuning = TuningStep( name="XGBHPTune", tuner=xgb_tuner_log, inputs={ "train": TrainingInput( s3_data=step_process.properties.ProcessingOutputConfig.Outputs[ "train" ].S3Output.S3Uri, content_type="text/csv", ), "validation": TrainingInput( s3_data=step_process.properties.ProcessingOutputConfig.Outputs[ "validation" ].S3Output.S3Uri, content_type="text/csv", ), }, cache_config=cache_config, ) # dtree_image_uri = '625467769535.dkr.ecr.ap-southeast-1.amazonaws.com/sagemaker-decision-tree:latest' dtree_image_uri = sagemaker_session.sagemaker_client.describe_image_version( ImageName="restate-dtree" )["ContainerImage"] dtree_train = Estimator( image_uri=dtree_image_uri, role=role, instance_count=1, instance_type=training_instance_type, base_job_name=f"{base_job_prefix}/restate-dtree-train", output_path=model_path, sagemaker_session=sagemaker_session, ) dtree_objective_metric_name = "validation:mse" dtree_metric_definitions = [{"Name": "validation:mse", "Regex": "mse:(\S+)"}] dtree_hyperparameter_ranges = { "max_depth": IntegerParameter(10, 50, scaling_type="Linear"), "max_leaf_nodes": IntegerParameter(2, 12, scaling_type="Linear"), } dtree_tuner_log = HyperparameterTuner( dtree_train, dtree_objective_metric_name, dtree_hyperparameter_ranges, dtree_metric_definitions, max_jobs=3, max_parallel_jobs=3, strategy="Random", objective_type="Minimize", ) dtree_step_tuning = TuningStep( name="DTreeHPTune", tuner=dtree_tuner_log, inputs={ "training": TrainingInput( s3_data=step_process.properties.ProcessingOutputConfig.Outputs[ "train" ].S3Output.S3Uri, content_type="text/csv", ), "validation": TrainingInput( s3_data=step_process.properties.ProcessingOutputConfig.Outputs[ "validation" ].S3Output.S3Uri, content_type="text/csv", ), }, cache_config=cache_config, ) dtree_script_eval = ScriptProcessor( image_uri=dtree_image_uri, command=["python3"], instance_type=processing_instance_type, instance_count=1, base_job_name=f"{base_job_prefix}/script-dtree-eval", sagemaker_session=sagemaker_session, role=role, ) dtree_evaluation_report = PropertyFile( name="EvaluationReportDTree", output_name="dtree_evaluation", path="dtree_evaluation.json", ) dtree_step_eval = ProcessingStep( name="DTreeEval", processor=dtree_script_eval, inputs=[ ProcessingInput( # source=dtree_step_train.properties.ModelArtifacts.S3ModelArtifacts, source=dtree_step_tuning.get_top_model_s3_uri(top_k=0, s3_bucket=model_bucket_key), destination="/opt/ml/processing/model", ), ProcessingInput( source=step_process.properties.ProcessingOutputConfig.Outputs[ "test" ].S3Output.S3Uri, destination="/opt/ml/processing/test", ), ], outputs=[ ProcessingOutput( output_name="dtree_evaluation", source="/opt/ml/processing/evaluation" ), ], code=os.path.join(BASE_DIR, "dtree_evaluate.py"), property_files=[dtree_evaluation_report], ) xgb_script_eval = ScriptProcessor( image_uri=xgb_image_uri, command=["python3"], instance_type=processing_instance_type, instance_count=1, base_job_name=f"{base_job_prefix}/script-xgb-eval", sagemaker_session=sagemaker_session, role=role, ) xgb_evaluation_report = PropertyFile( name="EvaluationReportXGBoost", output_name="xgb_evaluation", path="xgb_evaluation.json", ) xgb_step_eval = ProcessingStep( name="XGBEval", processor=xgb_script_eval, inputs=[ ProcessingInput( source=xgb_step_tuning.get_top_model_s3_uri(top_k=0, s3_bucket=model_bucket_key), destination="/opt/ml/processing/model", ), ProcessingInput( source=step_process.properties.ProcessingOutputConfig.Outputs[ "test" ].S3Output.S3Uri, destination="/opt/ml/processing/test", ), ], outputs=[ ProcessingOutput(output_name="xgb_evaluation", source="/opt/ml/processing/evaluation"), ], code=os.path.join(BASE_DIR, "xgb_evaluate.py"), property_files=[xgb_evaluation_report], ) xgb_model_metrics = ModelMetrics( model_statistics=MetricsSource( s3_uri="{}/xgb_evaluation.json".format( xgb_step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0]["S3Output"]["S3Uri"] ), content_type="application/json", ) ) dtree_model_metrics = ModelMetrics( model_statistics=MetricsSource( s3_uri="{}/dtree_evaluation.json".format( dtree_step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0]["S3Output"][ "S3Uri" ] ), content_type="application/json", ) ) xgb_eval_metrics = JsonGet( step=xgb_step_eval, property_file=xgb_evaluation_report, json_path="regression_metrics.r2s.value", # This should follow the structure of your report_dict defined in the evaluate.py file. ) dtree_eval_metrics = JsonGet( step=dtree_step_eval, property_file=dtree_evaluation_report, json_path="regression_metrics.r2s.value", # This should follow the structure of your report_dict defined in the evaluate.py file. ) # Register model step that will be conditionally executed dtree_step_register = RegisterModel( name="DTreeReg", estimator=dtree_train, model_data=dtree_step_tuning.get_top_model_s3_uri(top_k=0, s3_bucket=model_bucket_key), content_types=["text/csv"], response_types=["text/csv"], inference_instances=["ml.t2.medium", "ml.m5.large"], transform_instances=["ml.m5.large"], model_package_group_name=model_package_group_name, approval_status=model_approval_status, model_metrics=dtree_model_metrics, ) # Register model step that will be conditionally executed xgb_step_register = RegisterModel( name="XGBReg", estimator=xgb_train, model_data=xgb_step_tuning.get_top_model_s3_uri(top_k=0, s3_bucket=model_bucket_key), content_types=["text/csv"], response_types=["text/csv"], inference_instances=["ml.t2.medium", "ml.m5.large"], transform_instances=["ml.m5.large"], model_package_group_name=model_package_group_name, approval_status=model_approval_status, model_metrics=xgb_model_metrics, ) # Condition step for evaluating model quality and branching execution cond_lte = ConditionGreaterThanOrEqualTo( # You can change the condition here left=JsonGet( step=dtree_step_eval, property_file=dtree_evaluation_report, json_path="regression_metrics.r2s.value", # This should follow the structure of your report_dict defined in the evaluate.py file. ), right=JsonGet( step=xgb_step_eval, property_file=xgb_evaluation_report, json_path="regression_metrics.r2s.value", # This should follow the structure of your report_dict defined in the evaluate.py file. ), # You can change the threshold here ) step_cond = ConditionStep( name="AccuracyCond", conditions=[cond_lte], if_steps=[dtree_step_register], else_steps=[xgb_step_register], ) create_date = time.strftime("%Y-%m-%d-%H-%M-%S") # Pipeline instance pipeline = Pipeline( name=pipeline_name, parameters=[ processing_instance_type, processing_instance_count, training_instance_type, model_approval_status, input_data ], pipeline_experiment_config=PipelineExperimentConfig( pipeline_name + "-" + create_date, "restate-{}".format(create_date) ), steps=[ data_wrangler_step, step_process, dtree_step_tuning, xgb_step_tuning, dtree_step_eval, xgb_step_eval, step_cond, ], sagemaker_session=sagemaker_session, ) return pipeline
def get_pipeline(region, role, default_bucket, pipeline_name, model_package_group_name, base_job_prefix): """Gets a SageMaker ML Pipeline instance working with BERT. Args: region: AWS region to create and run the pipeline. role: IAM role to create and run steps and pipeline. default_bucket: the bucket to use for storing the artifacts pipeline_name: name of this pipeline model_package_group_name: model package group base_job_prefix: prefic of the job name Returns: an instance of a pipeline """ sm = boto3.Session().client(service_name="sagemaker", region_name=region) input_data = ParameterString( name="InputDataUrl", default_value="s3://{}/amazon-reviews-pds/tsv/".format(bucket), ) processing_instance_count = ParameterInteger( name="ProcessingInstanceCount", default_value=1) processing_instance_type = ParameterString(name="ProcessingInstanceType", default_value="ml.c5.2xlarge") max_seq_length = ParameterInteger( name="MaxSeqLength", default_value=64, ) balance_dataset = ParameterString( name="BalanceDataset", default_value="True", ) train_split_percentage = ParameterFloat( name="TrainSplitPercentage", default_value=0.90, ) validation_split_percentage = ParameterFloat( name="ValidationSplitPercentage", default_value=0.05, ) test_split_percentage = ParameterFloat( name="TestSplitPercentage", default_value=0.05, ) feature_store_offline_prefix = ParameterString( name="FeatureStoreOfflinePrefix", default_value="reviews-feature-store-" + str(timestamp), ) feature_group_name = ParameterString( name="FeatureGroupName", default_value="reviews-feature-group-" + str(timestamp)) train_instance_type = ParameterString(name="TrainInstanceType", default_value="ml.c5.9xlarge") train_instance_count = ParameterInteger(name="TrainInstanceCount", default_value=1) ######################### # PROCESSING STEP ######################### processor = SKLearnProcessor( framework_version="0.23-1", role=role, instance_type=processing_instance_type, instance_count=processing_instance_count, env={"AWS_DEFAULT_REGION": region}, max_runtime_in_seconds=7200, ) processing_inputs = [ ProcessingInput( input_name="raw-input-data", source=input_data, destination="/opt/ml/processing/input/data/", s3_data_distribution_type="ShardedByS3Key", ) ] processing_outputs = [ ProcessingOutput( output_name="bert-train", s3_upload_mode="EndOfJob", source="/opt/ml/processing/output/bert/train", ), ProcessingOutput( output_name="bert-validation", s3_upload_mode="EndOfJob", source="/opt/ml/processing/output/bert/validation", ), ProcessingOutput( output_name="bert-test", s3_upload_mode="EndOfJob", source="/opt/ml/processing/output/bert/test", ), ] # TODO: Figure out why the Parameter's are not resolving properly to their native type when user here. # We shouldn't be using `default_value` processing_step = ProcessingStep( name="Processing", processor=processor, inputs=processing_inputs, outputs=processing_outputs, job_arguments=[ "--train-split-percentage", str(train_split_percentage.default_value), "--validation-split-percentage", str(validation_split_percentage.default_value), "--test-split-percentage", str(test_split_percentage.default_value), "--max-seq-length", str(max_seq_length.default_value), "--balance-dataset", str(balance_dataset.default_value), "--feature-store-offline-prefix", str(feature_store_offline_prefix.default_value), "--feature-group-name", str(feature_group_name.default_value), ], code=os.path.join(BASE_DIR, "preprocess-scikit-text-to-bert-feature-store.py"), ) ######################### # TRAINING STEP ######################### epochs = ParameterInteger(name="Epochs", default_value=1) learning_rate = ParameterFloat(name="LearningRate", default_value=0.00001) epsilon = ParameterFloat(name="Epsilon", default_value=0.00000001) train_batch_size = ParameterInteger(name="TrainBatchSize", default_value=128) validation_batch_size = ParameterInteger(name="ValidationBatchSize", default_value=128) test_batch_size = ParameterInteger(name="TestBatchSize", default_value=128) train_steps_per_epoch = ParameterInteger(name="TrainStepsPerEpoch", default_value=50) validation_steps = ParameterInteger(name="ValidationSteps", default_value=50) test_steps = ParameterInteger(name="TestSteps", default_value=50) train_volume_size = ParameterInteger(name="TrainVolumeSize", default_value=1024) use_xla = ParameterString( name="UseXLA", default_value="True", ) use_amp = ParameterString( name="UseAMP", default_value="True", ) freeze_bert_layer = ParameterString( name="FreezeBERTLayer", default_value="False", ) enable_sagemaker_debugger = ParameterString( name="EnableSageMakerDebugger", default_value="False", ) enable_checkpointing = ParameterString( name="EnableCheckpointing", default_value="False", ) enable_tensorboard = ParameterString( name="EnableTensorboard", default_value="False", ) input_mode = ParameterString( name="InputMode", default_value="File", ) run_validation = ParameterString( name="RunValidation", default_value="True", ) run_test = ParameterString( name="RunTest", default_value="False", ) run_sample_predictions = ParameterString( name="RunSamplePredictions", default_value="False", ) metrics_definitions = [ { "Name": "train:loss", "Regex": "loss: ([0-9\\.]+)" }, { "Name": "train:accuracy", "Regex": "accuracy: ([0-9\\.]+)" }, { "Name": "validation:loss", "Regex": "val_loss: ([0-9\\.]+)" }, { "Name": "validation:accuracy", "Regex": "val_accuracy: ([0-9\\.]+)" }, ] train_src = os.path.join(BASE_DIR, "src") model_path = f"s3://{default_bucket}/{base_job_prefix}/output/model" estimator = TensorFlow( entry_point="tf_bert_reviews.py", source_dir=BASE_DIR, role=role, output_path=model_path, instance_count=train_instance_count, instance_type=train_instance_type, volume_size=train_volume_size, py_version="py37", framework_version="2.3.1", hyperparameters={ "epochs": epochs, "learning_rate": learning_rate, "epsilon": epsilon, "train_batch_size": train_batch_size, "validation_batch_size": validation_batch_size, "test_batch_size": test_batch_size, "train_steps_per_epoch": train_steps_per_epoch, "validation_steps": validation_steps, "test_steps": test_steps, "use_xla": use_xla, "use_amp": use_amp, "max_seq_length": max_seq_length, "freeze_bert_layer": freeze_bert_layer, "enable_sagemaker_debugger": enable_sagemaker_debugger, "enable_checkpointing": enable_checkpointing, "enable_tensorboard": enable_tensorboard, "run_validation": run_validation, "run_test": run_test, "run_sample_predictions": run_sample_predictions, }, input_mode=input_mode, metric_definitions=metrics_definitions, # max_run=7200 # max 2 hours * 60 minutes seconds per hour * 60 seconds per minute ) training_step = TrainingStep( name="Train", estimator=estimator, inputs={ "train": TrainingInput( s3_data=processing_step.properties.ProcessingOutputConfig. Outputs["bert-train"].S3Output.S3Uri, content_type="text/csv", ), "validation": TrainingInput( s3_data=processing_step.properties.ProcessingOutputConfig. Outputs["bert-validation"].S3Output.S3Uri, content_type="text/csv", ), "test": TrainingInput( s3_data=processing_step.properties.ProcessingOutputConfig. Outputs["bert-test"].S3Output.S3Uri, content_type="text/csv", ), }, ) ######################### # EVALUATION STEP ######################### evaluation_processor = SKLearnProcessor( framework_version="0.23-1", role=role, instance_type=processing_instance_type, instance_count=processing_instance_count, env={"AWS_DEFAULT_REGION": region}, max_runtime_in_seconds=7200, ) evaluation_report = PropertyFile(name="EvaluationReport", output_name="metrics", path="evaluation.json") evaluation_step = ProcessingStep( name="EvaluateModel", processor=evaluation_processor, code=os.path.join(BASE_DIR, "evaluate_model_metrics.py"), inputs=[ ProcessingInput( source=training_step.properties.ModelArtifacts. S3ModelArtifacts, destination="/opt/ml/processing/input/model", ), ProcessingInput( source=processing_step.properties. ProcessingInputs["raw-input-data"].S3Input.S3Uri, destination="/opt/ml/processing/input/data", ), ], outputs=[ ProcessingOutput(output_name="metrics", s3_upload_mode="EndOfJob", source="/opt/ml/processing/output/metrics/"), ], job_arguments=[ "--max-seq-length", str(max_seq_length.default_value), ], property_files=[evaluation_report ], # these cause deserialization issues ) model_metrics = ModelMetrics(model_statistics=MetricsSource( s3_uri="{}/evaluation.json".format( evaluation_step.arguments["ProcessingOutputConfig"]["Outputs"][0] ["S3Output"]["S3Uri"]), content_type="application/json", )) ######################### ## REGISTER TRAINED MODEL STEP ######################### model_approval_status = ParameterString( name="ModelApprovalStatus", default_value="PendingManualApproval") deploy_instance_type = ParameterString(name="DeployInstanceType", default_value="ml.m5.4xlarge") deploy_instance_count = ParameterInteger(name="DeployInstanceCount", default_value=1) inference_image_uri = sagemaker.image_uris.retrieve( framework="tensorflow", region=region, version="2.3.1", py_version="py37", instance_type=deploy_instance_type, image_scope="inference", ) print(inference_image_uri) register_step = RegisterModel( name="RegisterModel", estimator=estimator, image_uri= inference_image_uri, # we have to specify, by default it's using training image model_data=training_step.properties.ModelArtifacts.S3ModelArtifacts, content_types=["text/csv"], response_types=["text/csv"], inference_instances=[ deploy_instance_type ], # The JSON spec must be within these instance types or we will see "Instance Type Not Allowed" Exception transform_instances=[deploy_instance_type], model_package_group_name=model_package_group_name, approval_status=model_approval_status, ) ######################### ## CREATE MODEL FOR DEPLOYMENT STEP ######################### model = Model( image_uri=inference_image_uri, model_data=training_step.properties.ModelArtifacts.S3ModelArtifacts, sagemaker_session=sess, role=role, ) create_inputs = CreateModelInput(instance_type=deploy_instance_type, ) create_step = CreateModelStep( name="CreateModel", model=model, inputs=create_inputs, ) ######################### ## CONDITION STEP: EVALUATE THE MODEL ######################### min_accuracy_value = ParameterFloat(name="MinAccuracyValue", default_value=0.01) minimum_accuracy_condition = ConditionGreaterThanOrEqualTo( left=JsonGet( step=evaluation_step, property_file=evaluation_report, json_path="metrics.accuracy.value", ), right=min_accuracy_value, # accuracy ) minimum_accuracy_condition_step = ConditionStep( name="AccuracyCondition", conditions=[minimum_accuracy_condition], if_steps=[register_step, create_step], # success, continue with model registration else_steps=[], # fail, end the pipeline ) ######################### ## CREATE PIPELINE ######################### pipeline = Pipeline( name=pipeline_name, parameters=[ input_data, processing_instance_count, processing_instance_type, max_seq_length, balance_dataset, train_split_percentage, validation_split_percentage, test_split_percentage, feature_store_offline_prefix, feature_group_name, train_instance_type, train_instance_count, epochs, learning_rate, epsilon, train_batch_size, validation_batch_size, test_batch_size, train_steps_per_epoch, validation_steps, test_steps, train_volume_size, use_xla, use_amp, freeze_bert_layer, enable_sagemaker_debugger, enable_checkpointing, enable_tensorboard, input_mode, run_validation, run_test, run_sample_predictions, min_accuracy_value, model_approval_status, deploy_instance_type, deploy_instance_count, ], steps=[ processing_step, training_step, evaluation_step, minimum_accuracy_condition_step ], sagemaker_session=sess, ) ######################### ## RETURN PIPELINE ######################### return pipeline
step_publish = ProcessingStep( name="PublishViaAPI", processor=script_publish, inputs=[ ProcessingInput(source=step_eval.properties.ProcessingOutputConfig. Outputs["mlmodel"].S3Output.S3Uri, destination="/opt/ml/processing/mlmodel"), ProcessingInput( source=step_model_card.properties.ProcessingOutputConfig. Outputs["model_card"].S3Output.S3Uri, destination="/opt/ml/processing/model_card") ], code=f"{conf.source_dir}/publish_to_api.py") step_cond = ConditionStep(name="BittymAPcheck", conditions=[cond_map], if_steps=[step_register, step_publish], else_steps=[]) # finally, putting all the steps together in a Pipeline instance pipeline_name = conf.pipeline_name pipeline = Pipeline( name=pipeline_name, parameters=[ conf.processing_train_test_split, conf.processing_instance_count, conf.processing_instance_type, conf.summarizing_instance_count, conf.summarizing_instance_type, conf.training_instance_count, conf.training_instance_type, conf.training_batch_size, conf.training_max_iterations, conf.model_approval_status, conf.input_data, conf.model_approval_map_threshold ], steps=[
def get_pipeline( region, role=None, default_bucket=None, model_package_group_name="sagemaker-group-insurance", pipeline_name="sagemaker-pipeline-insurance", base_job_prefix="sagemaker-featurestore-insurance", ): """Gets a SageMaker ML Pipeline instance working with on WIP data. Args: region: AWS region to create and run the pipeline. role: IAM role to create and run steps and pipeline. default_bucket: the bucket to use for storing the artifacts Returns: an instance of a pipeline """ sagemaker_session = get_session(region, default_bucket) if role is None: role = sagemaker.session.get_execution_role(sagemaker_session) # parameters for pipeline execution processing_instance_count = ParameterInteger(name="ProcessingInstanceCount", default_value=1) processing_instance_type = ParameterString( name="ProcessingInstanceType", default_value="ml.m5.xlarge" ) training_instance_type = ParameterString( name="TrainingInstanceType", default_value="ml.m5.xlarge" ) model_approval_status = ParameterString( name="ModelApprovalStatus", default_value="Approved" ) # processing step for feature engineering sklearn_processor = SKLearnProcessor( framework_version="0.23-1", instance_type=processing_instance_type, instance_count=processing_instance_count, base_job_name=f"{base_job_prefix}/sklearn-insurance-preprocess", sagemaker_session=sagemaker_session, role=role, ) step_process = ProcessingStep( name="PreprocessInsuranceData", processor=sklearn_processor, outputs=[ ProcessingOutput(output_name="train", source="/opt/ml/processing/train"), ProcessingOutput(output_name="validation", source="/opt/ml/processing/validation"), ProcessingOutput(output_name="test", source="/opt/ml/processing/test"), ], code=os.path.join(BASE_DIR, "preprocess.py"), job_arguments=["--input_dataset_1", "41214", "--input_dataset_2", "41215",], ) ''' # feature store step feature_path = 's3://' + default_bucket+'/'+base_job_prefix + '/features' image_uri = sagemaker.image_uris.retrieve( framework="xgboost", region=region, version="1.0-1", py_version="py3", instance_type=training_instance_type, ) feature_processor = ScriptProcessor( image_uri=image_uri, command=["python3"], instance_type=processing_instance_type, instance_count=1, base_job_name=f"{base_job_prefix}/script-insurance-feature-store", sagemaker_session=sagemaker_session, role=role, ) step_feature = ProcessingStep( name="FeatureStoreInsuranceData", processor=feature_processor, outputs=[ ProcessingOutput(output_name="train", source="/opt/ml/processing/training_input"), ], code=os.path.join(BASE_DIR, "feature_store.py"), job_arguments=["feature_s3_url", feature_path, "--feature_group_name", "sagemaker-featurestore-insurance"], ) ''' # training step for generating model artifacts model_path = 's3://' + default_bucket+'/'+base_job_prefix + '/features' image_uri = sagemaker.image_uris.retrieve( framework="xgboost", region=region, version="1.0-1", py_version="py3", instance_type=training_instance_type, ) xgb_train = Estimator( image_uri=image_uri, instance_type=training_instance_type, instance_count=1, output_path=model_path, base_job_name=f"{base_job_prefix}/insurance-train", sagemaker_session=sagemaker_session, role=role, ) xgb_train.set_hyperparameters(objective = "reg:tweedie", num_round = 50) step_train = TrainingStep( name="TrainAbaloneModel", estimator=xgb_train, inputs={ "train": TrainingInput( s3_data=step_process.properties.ProcessingOutputConfig.Outputs[ "train" ].S3Output.S3Uri, content_type="text/csv", ), "validation": TrainingInput( s3_data=step_process.properties.ProcessingOutputConfig.Outputs[ "validation" ].S3Output.S3Uri, content_type="text/csv", ), }, ) # processing step for evaluation script_eval = ScriptProcessor( image_uri=image_uri, command=["python3"], instance_type=processing_instance_type, instance_count=1, base_job_name=f"{base_job_prefix}/script-wip-eval", sagemaker_session=sagemaker_session, role=role, ) evaluation_report = PropertyFile( name="WipEvaluationReport", output_name="evaluation", path="evaluation.json", ) step_eval = ProcessingStep( name="EvaluateWipModel", processor=script_eval, inputs=[ ProcessingInput( source=step_train.properties.ModelArtifacts.S3ModelArtifacts, destination="/opt/ml/processing/model", ), ProcessingInput( source=step_process.properties.ProcessingOutputConfig.Outputs[ "test" ].S3Output.S3Uri, destination="/opt/ml/processing/test", ), ], outputs=[ ProcessingOutput(output_name="evaluation", source="/opt/ml/processing/evaluation"), ], code=os.path.join(BASE_DIR, "evaluate.py"), property_files=[evaluation_report], ) # register model step that will be conditionally executed model_metrics = ModelMetrics( model_statistics=MetricsSource( s3_uri="{}/evaluation.json".format( step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0]["S3Output"]["S3Uri"] ), content_type="application/json" ) ) step_register = RegisterModel( name="register-insurance-model", estimator=xgb_train, model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts, content_types=["text/csv"], response_types=["text/csv"], inference_instances=["ml.t2.medium", "ml.m5.large"], transform_instances=["ml.m5.large"], model_package_group_name=model_package_group_name, approval_status=model_approval_status, model_metrics=model_metrics, ) # condition step for evaluating model quality and branching execution cond_lte = ConditionLessThanOrEqualTo( left=JsonGet( step=step_eval, property_file=evaluation_report, json_path="regression_metrics.mse.value" ), right=6.0, ) step_cond = ConditionStep( name="CheckMSEWipEvaluation", conditions=[cond_lte], if_steps=[], else_steps=[step_register], ) pipeline = Pipeline( name=pipeline_name, parameters=[ processing_instance_type, processing_instance_count, training_instance_type, model_approval_status, ], steps=[step_process, step_train, step_eval, step_cond], sagemaker_session=sagemaker_session, ) return pipeline
def test_workflow_with_clarify( data_config, data_bias_config, model_config, model_predicted_label_config, pipeline_name, role, sagemaker_session, ): instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge") instance_count = ParameterInteger(name="InstanceCount", default_value=1) analysis_config = data_config.get_config() analysis_config.update(data_bias_config.get_config()) ( probability_threshold, predictor_config, ) = model_predicted_label_config.get_predictor_config() predictor_config.update(model_config.get_predictor_config()) analysis_config["methods"] = {"post_training_bias": {"methods": "all"}} analysis_config["predictor"] = predictor_config analysis_config["probability_threshold"] = probability_threshold analysis_config["methods"]["report"] = {"name": "report", "title": "Analysis Report"} with tempfile.TemporaryDirectory() as tmpdirname: analysis_config_file = os.path.join(tmpdirname, "analysis_config.json") with open(analysis_config_file, "w") as f: json.dump(analysis_config, f) config_input = ProcessingInput( input_name="analysis_config", source=analysis_config_file, destination="/opt/ml/processing/input/config", s3_data_type="S3Prefix", s3_input_mode="File", s3_compression_type="None", ) data_input = ProcessingInput( input_name="dataset", source=data_config.s3_data_input_path, destination="/opt/ml/processing/input/data", s3_data_type="S3Prefix", s3_input_mode="File", s3_data_distribution_type=data_config.s3_data_distribution_type, s3_compression_type=data_config.s3_compression_type, ) result_output = ProcessingOutput( source="/opt/ml/processing/output", destination=data_config.s3_output_path, output_name="analysis_result", s3_upload_mode="EndOfJob", ) processor = SageMakerClarifyProcessor( role="SageMakerRole", instance_count=instance_count, instance_type=instance_type, sagemaker_session=sagemaker_session, ) property_file = PropertyFile( name="BiasOutput", output_name="analysis_result", path="analysis.json", ) step_process = ProcessingStep( name="my-process", processor=processor, inputs=[data_input, config_input], outputs=[result_output], property_files=[property_file], ) # Keep the deprecated JsonGet in test to verify it's compatible with new changes cond_left = JsonGet( step=step_process, property_file="BiasOutput", json_path="post_training_bias_metrics.facets.F1[0].metrics[0].value", ) step_condition = ConditionStep( name="bias-condition", conditions=[ConditionLessThanOrEqualTo(left=cond_left, right=1)], if_steps=[], else_steps=[], ) pipeline = Pipeline( name=pipeline_name, parameters=[instance_type, instance_count], steps=[step_process, step_condition], sagemaker_session=sagemaker_session, ) try: response = pipeline.create(role) create_arn = response["PipelineArn"] execution = pipeline.start(parameters={}) response = execution.describe() assert response["PipelineArn"] == create_arn try: execution.wait(delay=30, max_attempts=60) except WaiterError: pass execution_steps = execution.list_steps() assert len(execution_steps) == 2 assert execution_steps[1]["StepName"] == "my-process" assert execution_steps[1]["StepStatus"] == "Succeeded" assert execution_steps[0]["StepName"] == "bias-condition" assert execution_steps[0]["StepStatus"] == "Succeeded" assert execution_steps[0]["Metadata"]["Condition"]["Outcome"] == "True" finally: try: pipeline.delete() except Exception: pass
def test_pipeline_variable_in_pipeline_definition(sagemaker_session): param_str = ParameterString(name="MyString", default_value="1") param_int = ParameterInteger(name="MyInteger", default_value=3) property_file = PropertyFile( name="name", output_name="result", path="output", ) json_get_func2 = JsonGet( step_name="my-step", property_file=property_file, json_path="my-json-path", ) prop = Properties("Steps.MyStep", "DescribeProcessingJobResponse") cond = ConditionGreaterThan(left=param_str, right=param_int.to_string()) step_fail = FailStep( name="MyFailStep", error_message=Join( on=" ", values=[ "Execution failed due to condition check fails, see:", json_get_func2.to_string(), prop.ProcessingOutputConfig.Outputs["MyOutputName"].S3Output. S3Uri.to_string(), param_int, ], ), ) step_cond = ConditionStep( name="MyCondStep", conditions=[cond], if_steps=[], else_steps=[step_fail], ) pipeline = Pipeline( name="MyPipeline", parameters=[param_str, param_int], steps=[step_cond], sagemaker_session=sagemaker_session, ) dsl = json.loads(pipeline.definition()) assert dsl["Parameters"] == [ { "Name": "MyString", "Type": "String", "DefaultValue": "1" }, { "Name": "MyInteger", "Type": "Integer", "DefaultValue": 3 }, ] assert len(dsl["Steps"]) == 1 assert dsl["Steps"][0] == { "Name": "MyCondStep", "Type": "Condition", "Arguments": { "Conditions": [ { "Type": "GreaterThan", "LeftValue": { "Get": "Parameters.MyString" }, "RightValue": { "Std:Join": { "On": "", "Values": [{ "Get": "Parameters.MyInteger" }], }, }, }, ], "IfSteps": [], "ElseSteps": [{ "Name": "MyFailStep", "Type": "Fail", "Arguments": { "ErrorMessage": { "Std:Join": { "On": " ", "Values": [ "Execution failed due to condition check fails, see:", { "Std:Join": { "On": "", "Values": [ { "Std:JsonGet": { "PropertyFile": { "Get": "Steps.my-step.PropertyFiles.name" }, "Path": "my-json-path", } }, ], }, }, { "Std:Join": { "On": "", "Values": [ { "Get": "Steps.MyStep.ProcessingOutputConfig." + "Outputs['MyOutputName'].S3Output.S3Uri" }, ], }, }, { "Get": "Parameters.MyInteger" }, ], } } }, }], }, }
def test_steps_with_map_params_pipeline( sagemaker_session, role, script_dir, pipeline_name, region_name, athena_dataset_definition, ): instance_count = ParameterInteger(name="InstanceCount", default_value=2) framework_version = "0.20.0" instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge") output_prefix = ParameterString(name="OutputPrefix", default_value="output") input_data = f"s3://sagemaker-sample-data-{region_name}/processing/census/census-income.csv" sklearn_processor = SKLearnProcessor( framework_version=framework_version, instance_type=instance_type, instance_count=instance_count, base_job_name="test-sklearn", sagemaker_session=sagemaker_session, role=role, ) step_process = ProcessingStep( name="my-process", display_name="ProcessingStep", description="description for Processing step", processor=sklearn_processor, inputs=[ ProcessingInput(source=input_data, destination="/opt/ml/processing/input"), ProcessingInput(dataset_definition=athena_dataset_definition), ], outputs=[ ProcessingOutput(output_name="train_data", source="/opt/ml/processing/train"), ProcessingOutput( output_name="test_data", source="/opt/ml/processing/test", destination=Join( on="/", values=[ "s3:/", sagemaker_session.default_bucket(), "test-sklearn", output_prefix, ExecutionVariables.PIPELINE_EXECUTION_ID, ], ), ), ], code=os.path.join(script_dir, "preprocessing.py"), ) sklearn_train = SKLearn( framework_version=framework_version, entry_point=os.path.join(script_dir, "train.py"), instance_type=instance_type, sagemaker_session=sagemaker_session, role=role, hyperparameters={ "batch-size": 500, "epochs": 5, }, ) step_train = TrainingStep( name="my-train", display_name="TrainingStep", description="description for Training step", estimator=sklearn_train, inputs=TrainingInput( s3_data=step_process.properties.ProcessingOutputConfig.Outputs[ "train_data" ].S3Output.S3Uri ), ) model = Model( image_uri=sklearn_train.image_uri, model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts, sagemaker_session=sagemaker_session, role=role, ) model_inputs = CreateModelInput( instance_type="ml.m5.large", accelerator_type="ml.eia1.medium", ) step_model = CreateModelStep( name="my-model", display_name="ModelStep", description="description for Model step", model=model, inputs=model_inputs, ) # Condition step for evaluating model quality and branching execution cond_lte = ConditionGreaterThanOrEqualTo( left=step_train.properties.HyperParameters["batch-size"], right=6.0, ) step_cond = ConditionStep( name="CustomerChurnAccuracyCond", conditions=[cond_lte], if_steps=[], else_steps=[step_model], ) pipeline = Pipeline( name=pipeline_name, parameters=[instance_type, instance_count, output_prefix], steps=[step_process, step_train, step_cond], sagemaker_session=sagemaker_session, ) definition = json.loads(pipeline.definition()) assert definition["Version"] == "2020-12-01" steps = definition["Steps"] assert len(steps) == 3 training_args = {} condition_args = {} for step in steps: if step["Type"] == "Training": training_args = step["Arguments"] if step["Type"] == "Condition": condition_args = step["Arguments"] assert training_args["InputDataConfig"][0]["DataSource"]["S3DataSource"]["S3Uri"] == { "Get": "Steps.my-process.ProcessingOutputConfig.Outputs['train_data'].S3Output.S3Uri" } assert condition_args["Conditions"][0]["LeftValue"] == { "Get": "Steps.my-train.HyperParameters['batch-size']" } try: response = pipeline.create(role) create_arn = response["PipelineArn"] assert re.match( rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}", create_arn, ) finally: try: pipeline.delete() except Exception: pass
def get_pipeline( region, role=None, default_bucket=None, model_package_group_name="CustomerChurnPackageGroup", # Choose any name pipeline_name="CustomerChurnDemo-p-ewf8t7lvhivm", # You can find your pipeline name in the Studio UI (project -> Pipelines -> name) base_job_prefix="CustomerChurn", # Choose any name ): """Gets a SageMaker ML Pipeline instance working with on CustomerChurn data. Args: region: AWS region to create and run the pipeline. role: IAM role to create and run steps and pipeline. default_bucket: the bucket to use for storing the artifacts Returns: an instance of a pipeline """ sagemaker_session = get_session(region, default_bucket) if role is None: role = sagemaker.session.get_execution_role(sagemaker_session) # Parameters for pipeline execution processing_instance_count = ParameterInteger( name="ProcessingInstanceCount", default_value=1) processing_instance_type = ParameterString(name="ProcessingInstanceType", default_value="ml.m5.xlarge") training_instance_type = ParameterString(name="TrainingInstanceType", default_value="ml.m5.xlarge") model_approval_status = ParameterString( name="ModelApprovalStatus", default_value= "PendingManualApproval", # ModelApprovalStatus can be set to a default of "Approved" if you don't want manual approval. ) input_data = ParameterString( name="InputDataUrl", default_value= f"s3://sm-pipelines-demo-data-123456789/churn.txt", # Change this to point to the s3 location of your raw input data. ) # Processing step for feature engineering sklearn_processor = SKLearnProcessor( framework_version="0.23-1", instance_type=processing_instance_type, instance_count=processing_instance_count, base_job_name= f"{base_job_prefix}/sklearn-CustomerChurn-preprocess", # choose any name sagemaker_session=sagemaker_session, role=role, ) step_process = ProcessingStep( name="CustomerChurnProcess", # choose any name processor=sklearn_processor, outputs=[ ProcessingOutput(output_name="train", source="/opt/ml/processing/train"), ProcessingOutput(output_name="validation", source="/opt/ml/processing/validation"), ProcessingOutput(output_name="test", source="/opt/ml/processing/test"), ], code=os.path.join(BASE_DIR, "preprocess.py"), job_arguments=["--input-data", input_data], ) # Training step for generating model artifacts model_path = f"s3://{sagemaker_session.default_bucket()}/{base_job_prefix}/CustomerChurnTrain" image_uri = sagemaker.image_uris.retrieve( framework= "xgboost", # we are using the Sagemaker built in xgboost algorithm region=region, version="1.0-1", py_version="py3", instance_type=training_instance_type, ) xgb_train = Estimator( image_uri=image_uri, instance_type=training_instance_type, instance_count=1, output_path=model_path, base_job_name=f"{base_job_prefix}/CustomerChurn-train", sagemaker_session=sagemaker_session, role=role, ) xgb_train.set_hyperparameters( objective="binary:logistic", num_round=50, max_depth=5, eta=0.2, gamma=4, min_child_weight=6, subsample=0.7, silent=0, ) step_train = TrainingStep( name="CustomerChurnTrain", estimator=xgb_train, inputs={ "train": TrainingInput( s3_data=step_process.properties.ProcessingOutputConfig. Outputs["train"].S3Output.S3Uri, content_type="text/csv", ), "validation": TrainingInput( s3_data=step_process.properties.ProcessingOutputConfig. Outputs["validation"].S3Output.S3Uri, content_type="text/csv", ), }, ) # Processing step for evaluation script_eval = ScriptProcessor( image_uri=image_uri, command=["python3"], instance_type=processing_instance_type, instance_count=1, base_job_name=f"{base_job_prefix}/script-CustomerChurn-eval", sagemaker_session=sagemaker_session, role=role, ) evaluation_report = PropertyFile( name="EvaluationReport", output_name="evaluation", path="evaluation.json", ) step_eval = ProcessingStep( name="CustomerChurnEval", processor=script_eval, inputs=[ ProcessingInput( source=step_train.properties.ModelArtifacts.S3ModelArtifacts, destination="/opt/ml/processing/model", ), ProcessingInput( source=step_process.properties.ProcessingOutputConfig. Outputs["test"].S3Output.S3Uri, destination="/opt/ml/processing/test", ), ], outputs=[ ProcessingOutput(output_name="evaluation", source="/opt/ml/processing/evaluation"), ], code=os.path.join(BASE_DIR, "evaluate.py"), property_files=[evaluation_report], ) # Register model step that will be conditionally executed model_metrics = ModelMetrics(model_statistics=MetricsSource( s3_uri="{}/evaluation.json".format( step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0] ["S3Output"]["S3Uri"]), content_type="application/json", )) # Register model step that will be conditionally executed step_register = RegisterModel( name="CustomerChurnRegisterModel", estimator=xgb_train, model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts, content_types=["text/csv"], response_types=["text/csv"], inference_instances=["ml.t2.medium", "ml.m5.large"], transform_instances=["ml.m5.large"], model_package_group_name=model_package_group_name, approval_status=model_approval_status, model_metrics=model_metrics, ) # Condition step for evaluating model quality and branching execution cond_lte = ConditionGreaterThanOrEqualTo( # You can change the condition here left=JsonGet( step=step_eval, property_file=evaluation_report, json_path= "binary_classification_metrics.accuracy.value", # This should follow the structure of your report_dict defined in the evaluate.py file. ), right=0.8, # You can change the threshold here ) step_cond = ConditionStep( name="CustomerChurnAccuracyCond", conditions=[cond_lte], if_steps=[step_register], else_steps=[], ) # Pipeline instance pipeline = Pipeline( name=pipeline_name, parameters=[ processing_instance_type, processing_instance_count, training_instance_type, model_approval_status, input_data, ], steps=[step_process, step_train, step_eval, step_cond], sagemaker_session=sagemaker_session, ) return pipeline
def get_pipeline( region, role=None, default_bucket=None, model_package_group_name="AbalonePackageGroup", pipeline_name="AbalonePipeline", base_job_prefix="Abalone", ): """Gets a SageMaker ML Pipeline instance working with on abalone data. Args: region: AWS region to create and run the pipeline. role: IAM role to create and run steps and pipeline. default_bucket: the bucket to use for storing the artifacts Returns: an instance of a pipeline """ sagemaker_session = get_session(region, default_bucket) if role is None: role = sagemaker.session.get_execution_role(sagemaker_session) # Create cache configuration cache_config = CacheConfig(enable_caching=True, expire_after="T30m") # Create SKlean processor object sklearn_processor = SKLearnProcessor( framework_version="0.20.0", role=role, instance_type=processing_instance_type, instance_count=processing_instance_count, base_job_name="credit-processing-job" ) # Use the sklearn_processor in a Sagemaker pipelines ProcessingStep step_preprocess_data = ProcessingStep( name="PreprocessCreditData", processor=sklearn_processor, cache_config=cache_config, inputs=[ ProcessingInput(source=input_data, destination="/opt/ml/processing/input"), ], outputs=[ ProcessingOutput(output_name="train", source="/opt/ml/processing/output/train"), ProcessingOutput(output_name="validation", source="/opt/ml/processing/output/validation"), ProcessingOutput(output_name="test", source="/opt/ml/processing/output/test"), ProcessingOutput(output_name="baseline_with_headers", source="/opt/ml/processing/output/baseline") ], code=os.path.join(BASE_DIR, "preprocessing.py"), ) # Where to store the trained model model_path = f"s3://{default_bucket}/CreditTrain" # Fetch container to use for training image_uri = sagemaker.image_uris.retrieve( framework="xgboost", region=region, version="1.2-2", py_version="py3", instance_type=training_instance_type, ) # Create XGBoost estimator object xgb_estimator = Estimator( image_uri=image_uri, instance_type=training_instance_type, instance_count=1, output_path=model_path, role=role, disable_profiler=True, ) # Specify hyperparameters xgb_estimator.set_hyperparameters(max_depth=5, eta=0.2, gamma=4, min_child_weight=6, subsample=0.8, objective='binary:logistic', num_round=25) # Use the xgb_estimator in a Sagemaker pipelines ProcessingStep. # NOTE how the input to the training job directly references the output of the previous step. step_train_model = TrainingStep( name="TrainCreditModel", estimator=xgb_estimator, cache_config=cache_config, inputs={ "train": TrainingInput( s3_data=step_preprocess_data.properties.ProcessingOutputConfig.Outputs[ "train" ].S3Output.S3Uri, content_type="text/csv" ), "validation": TrainingInput( s3_data=step_preprocess_data.properties.ProcessingOutputConfig.Outputs[ "validation" ].S3Output.S3Uri, content_type="text/csv" ) }, ) # Create ScriptProcessor object. evaluate_model_processor = ScriptProcessor( image_uri=image_uri, command=["python3"], instance_type=processing_instance_type, instance_count=1, base_job_name="script-credit-eval", role=role, ) # Create a PropertyFile # We use a PropertyFile to be able to reference outputs from a processing step, for instance to use in a condition step, which we'll see later on. # For more information, visit https://docs.aws.amazon.com/sagemaker/latest/dg/build-and-manage-propertyfile.html evaluation_report = PropertyFile( name="EvaluationReport", output_name="evaluation", path="evaluation.json" ) # Use the evaluate_model_processor in a Sagemaker pipelines ProcessingStep. step_evaluate_model = ProcessingStep( name="EvaluateCreditModel", processor=evaluate_model_processor, cache_config=cache_config, inputs=[ ProcessingInput( source=step_train_model.properties.ModelArtifacts.S3ModelArtifacts, destination="/opt/ml/processing/model" ), ProcessingInput( source=step_preprocess_data.properties.ProcessingOutputConfig.Outputs[ "test" ].S3Output.S3Uri, destination="/opt/ml/processing/test" ) ], outputs=[ ProcessingOutput(output_name="evaluation", source="/opt/ml/processing/evaluation"), ], code=os.path.join(BASE_DIR, "evaluation.py"), property_files=[evaluation_report], ) model_metrics = ModelMetrics( model_statistics=MetricsSource( s3_uri="{}/evaluation.json".format( step_evaluate_model.arguments["ProcessingOutputConfig"]["Outputs"][0]["S3Output"]["S3Uri"] ), content_type="application/json" ) ) # Crete a RegisterModel step, which registers your model with Sagemaker Model Registry. step_register_model = RegisterModel( name="RegisterCreditModel", estimator=xgb_estimator, model_data=step_train_model.properties.ModelArtifacts.S3ModelArtifacts, content_types=["text/csv"], response_types=["text/csv"], inference_instances=["ml.t2.medium", "ml.m5.xlarge", "ml.m5.large"], transform_instances=["ml.m5.xlarge"], model_package_group_name=model_package_group_name, approval_status=model_approval_status, model_metrics=model_metrics ) # Create Processor object using the model monitor image baseline_processor = sagemaker.processing.Processor( base_job_name="credit-risk-baseline-processor", image_uri=sagemaker.image_uris.retrieve(framework='model-monitor', region='eu-west-1'), role=role, instance_count=1, instance_type=processing_instance_type, env = { "dataset_format": "{\"csv\": {\"header\": true} }", "dataset_source": "/opt/ml/processing/sm_input", "output_path": "/opt/ml/processing/sm_output", "publish_cloudwatch_metrics": "Disabled" } ) # Create a Sagemaker Pipeline step, using the baseline_processor. step_create_data_baseline = ProcessingStep( name="CreateModelQualityBaseline", processor=baseline_processor, cache_config=cache_config, inputs=[ ProcessingInput( source=step_preprocess_data.properties.ProcessingOutputConfig.Outputs[ "baseline_with_headers" ].S3Output.S3Uri, destination="/opt/ml/processing/sm_input", ) ], outputs=[ ProcessingOutput( source="/opt/ml/processing/sm_output", destination="s3://{}/{}/baseline".format(default_bucket, base_job_prefix), output_name="baseline_result", ) ], ) # Create Condition cond_gte = ConditionGreaterThanOrEqualTo( left=JsonGet( step=step_evaluate_model, property_file=evaluation_report, json_path="binary_classification_metrics.accuracy.value" ), right=0.7 ) # Create a Sagemaker Pipelines ConditionStep, using the condition we just created. step_cond = ConditionStep( name="AccuracyCondition", conditions=[cond_gte], if_steps=[step_register_model], else_steps=[], ) from sagemaker.workflow.pipeline import Pipeline # Create a Sagemaker Pipeline pipeline = Pipeline( name=pipeline_name, parameters=[ processing_instance_type, processing_instance_count, training_instance_type, model_approval_status, input_data, ], steps=[step_preprocess_data, step_train_model, step_evaluate_model, step_create_data_baseline, step_cond], ) return pipeline
def get_pipeline( region, security_group_ids, subnets, processing_role=None, training_role=None, data_bucket=None, model_bucket=None, model_package_group_name="AbalonePackageGroup", pipeline_name="AbalonePipeline", base_job_prefix="Abalone", ): """Gets a SageMaker ML Pipeline instance working with on abalone data. Args: region: AWS region to create and run the pipeline. processing_role: IAM role to create and run processing steps training_role: IAM role to create and run training steps data_bucket: the bucket to use for storing the artifacts Returns: an instance of a pipeline """ sagemaker_session = get_session(region, data_bucket) if processing_role is None: processing_role = sagemaker.session.get_execution_role(sagemaker_session) if training_role is None: training_role = sagemaker.session.get_execution_role(sagemaker_session) if model_bucket is None: model_bucket = sagemaker_session.default_bucket() print(f"Creating the pipeline '{pipeline_name}':") print(f"Parameters:{region}\n{security_group_ids}\n{subnets}\n{processing_role}\n\ {training_role}\n{data_bucket}\n{model_bucket}\n{model_package_group_name}\n\ {pipeline_name}\n{base_job_prefix}") # parameters for pipeline execution processing_instance_count = ParameterInteger(name="ProcessingInstanceCount", default_value=1) processing_instance_type = ParameterString( name="ProcessingInstanceType", default_value="ml.m5.xlarge" ) training_instance_type = ParameterString( name="TrainingInstanceType", default_value="ml.m5.xlarge" ) model_approval_status = ParameterString( name="ModelApprovalStatus", default_value="PendingManualApproval" ) input_data = ParameterString( name="InputDataUrl", default_value=f"s3://{sagemaker_session.default_bucket()}/datasets/abalone-dataset.csv", ) # configure network for encryption, network isolation and VPC configuration # Since the preprocessor job takes the data from S3, enable_network_isolation must be set to False # see https://github.com/aws/amazon-sagemaker-examples/issues/1689 network_config = NetworkConfig( enable_network_isolation=False, security_group_ids=security_group_ids.split(","), subnets=subnets.split(","), encrypt_inter_container_traffic=True) # processing step for feature engineering sklearn_processor = SKLearnProcessor( framework_version="0.23-1", instance_type=processing_instance_type, instance_count=processing_instance_count, base_job_name=f"{base_job_prefix}/sklearn-abalone-preprocess", sagemaker_session=sagemaker_session, role=processing_role, network_config=network_config ) step_process = ProcessingStep( name="PreprocessAbaloneData", processor=sklearn_processor, outputs=[ ProcessingOutput(output_name="train", source="/opt/ml/processing/train"), ProcessingOutput(output_name="validation", source="/opt/ml/processing/validation"), ProcessingOutput(output_name="test", source="/opt/ml/processing/test"), ], code=os.path.join(BASE_DIR, "preprocess.py"), job_arguments=["--input-data", input_data], ) # training step for generating model artifacts model_path = f"s3://{model_bucket}/{base_job_prefix}/AbaloneTrain" image_uri = sagemaker.image_uris.retrieve( framework="xgboost", region=region, version="1.0-1", py_version="py3", instance_type=training_instance_type, ) xgb_train = Estimator( image_uri=image_uri, instance_type=training_instance_type, instance_count=1, output_path=model_path, base_job_name=f"{base_job_prefix}/abalone-train", sagemaker_session=sagemaker_session, role=training_role, subnets=network_config.subnets, security_group_ids=network_config.security_group_ids, encrypt_inter_container_traffic=True, enable_network_isolation=False ) xgb_train.set_hyperparameters( objective="reg:linear", num_round=50, max_depth=5, eta=0.2, gamma=4, min_child_weight=6, subsample=0.7, silent=0, ) step_train = TrainingStep( name="TrainAbaloneModel", estimator=xgb_train, inputs={ "train": TrainingInput( s3_data=step_process.properties.ProcessingOutputConfig.Outputs[ "train" ].S3Output.S3Uri, content_type="text/csv", ), "validation": TrainingInput( s3_data=step_process.properties.ProcessingOutputConfig.Outputs[ "validation" ].S3Output.S3Uri, content_type="text/csv", ), }, ) # processing step for evaluation script_eval = ScriptProcessor( image_uri=image_uri, command=["python3"], instance_type=processing_instance_type, instance_count=1, base_job_name=f"{base_job_prefix}/script-abalone-eval", sagemaker_session=sagemaker_session, role=processing_role, network_config=network_config ) evaluation_report = PropertyFile( name="AbaloneEvaluationReport", output_name="evaluation", path="evaluation.json", ) step_eval = ProcessingStep( name="EvaluateAbaloneModel", processor=script_eval, inputs=[ ProcessingInput( source=step_train.properties.ModelArtifacts.S3ModelArtifacts, destination="/opt/ml/processing/model", ), ProcessingInput( source=step_process.properties.ProcessingOutputConfig.Outputs[ "test" ].S3Output.S3Uri, destination="/opt/ml/processing/test", ), ], outputs=[ ProcessingOutput(output_name="evaluation", source="/opt/ml/processing/evaluation"), ], code=os.path.join(BASE_DIR, "evaluate.py"), property_files=[evaluation_report], ) # register model step that will be conditionally executed model_metrics = ModelMetrics( model_statistics=MetricsSource( s3_uri="{}/evaluation.json".format( step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0]["S3Output"]["S3Uri"] ), content_type="application/json" ) ) """ There is a bug in RegisterModel implementation The RegisterModel step is implemented in the SDK as two steps, a _RepackModelStep and a _RegisterModelStep. The _RepackModelStep runs a SKLearn training step in order to repack the model.tar.gz to include any custom inference code in the archive. The _RegisterModelStep then registers the repacked model. The problem is that the _RepackModelStep does not propagate VPC configuration from the Estimator object: https://github.com/aws/sagemaker-python-sdk/blob/cdb633b3ab02398c3b77f5ecd2c03cdf41049c78/src/sagemaker/workflow/_utils.py#L88 This cause the AccessDenied exception because repacker cannot access S3 bucket (all access which is not via VPC endpoint is bloked by the bucket policy) The issue is opened against SageMaker python SDK: https://github.com/aws/sagemaker-python-sdk/issues/2302 """ vpc_config = { "Subnets":network_config.subnets, "SecurityGroupIds":network_config.security_group_ids } step_register = RegisterModel( name="RegisterAbaloneModel", estimator=xgb_train, model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts, content_types=["text/csv"], response_types=["text/csv"], inference_instances=["ml.t2.medium", "ml.m5.large"], transform_instances=["ml.m5.large"], model_package_group_name=model_package_group_name, approval_status=model_approval_status, model_metrics=model_metrics, vpc_config_override=vpc_config ) # condition step for evaluating model quality and branching execution cond_lte = ConditionLessThanOrEqualTo( left=JsonGet( step=step_eval, property_file=evaluation_report, json_path="regression_metrics.mse.value" ), right=6.0, ) step_cond = ConditionStep( name="CheckMSEAbaloneEvaluation", conditions=[cond_lte], if_steps=[step_register], else_steps=[], ) # pipeline instance pipeline = Pipeline( name=pipeline_name, parameters=[ processing_instance_type, processing_instance_count, training_instance_type, model_approval_status, input_data, ], steps=[step_process, step_train, step_eval, step_cond], sagemaker_session=sagemaker_session, ) return pipeline
def get_pipeline( region, sagemaker_session, role=None, default_bucket=None, model_package_group_name="sts-sklearn-grp", pipeline_name="stsPipeline", base_job_prefix="sts", ) -> Pipeline: """Gets a SageMaker ML Pipeline instance working with on sts data. Args: region: AWS region to create and run the pipeline. role: IAM role to create and run steps and pipeline. default_bucket: the bucket to use for storing the artifacts Returns: an instance of a pipeline """ """ Instance types allowed: ml.r5.12xlarge, ml.m5.4xlarge, ml.p2.xlarge, ml.m4.16xlarge, ml.r5.24xlarge, ml.t3.xlarge, ml.r5.16xlarge, ml.m5.large, ml.p3.16xlarge, ml.p2.16xlarge, ml.c4.2xlarge, ml.c5.2xlarge, ml.c4.4xlarge, ml.c5.4xlarge, ml.c4.8xlarge, ml.c5.9xlarge, ml.c5.xlarge, ml.c4.xlarge, ml.t3.2xlarge, ml.t3.medium, ml.c5.18xlarge, ml.r5.2xlarge, ml.p3.2xlarge, ml.m5.xlarge, ml.m4.10xlarge, ml.r5.4xlarge, ml.m5.12xlarge, ml.m4.xlarge, ml.t3.large, ml.m5.24xlarge, ml.m4.2xlarge, ml.m5.2xlarge, ml.p2.8xlarge, ml.r5.8xlarge, ml.r5.xlarge, ml.r5.large, ml.p3.8xlarge, ml.m4.4xlarge see https://aws.amazon.com/blogs/machine-learning/right-sizing-resources-and-avoiding-unnecessary-costs-in-amazon-sagemaker/ """ sagemaker_session = get_session(region, default_bucket) if role is None: role = sagemaker.session.get_execution_role(sagemaker_session) # parameters for pipeline execution processing_instance_count = ParameterInteger( name="ProcessingInstanceCount", default_value=1) processing_instance_type = ParameterString(name="ProcessingInstanceType", default_value="ml.m5.xlarge") # as of free tier of 50 hours of m4.xlarge or m5.xlarge instances training_instance_type = ParameterString(name="TrainingInstanceType", default_value="ml.m5.xlarge") model_approval_status = ParameterString(name="ModelApprovalStatus", default_value="Approved") # preprocess # preprocess input data input_data = ParameterString( name="InputDataUrl", default_value=f"s3://sts-datwit-dataset/stsmsrpc.txt", ) # processing step for feature engineering sklearn_processor = SKLearnProcessor( framework_version="0.23-1", instance_type=processing_instance_type, instance_count=processing_instance_count, base_job_name=f"{base_job_prefix}/sklearn-sts-preprocess", sagemaker_session=sagemaker_session, role=role, ) step_preprocess = ProcessingStep( name="PreprocessSTSData", processor=sklearn_processor, outputs=[ ProcessingOutput(output_name="train", source="/opt/ml/processing/train"), ProcessingOutput(output_name="validation", source="/opt/ml/processing/validation"), ProcessingOutput(output_name="test", source="/opt/ml/processing/test"), ], code=os.path.join(BASE_DIR, "preprocess.py"), job_arguments=["--input-data", input_data], ) # training step for generating model artifacts model_path = f"s3://{sagemaker_session.default_bucket()}/{base_job_prefix}/stsTrain" image_uri = sagemaker.image_uris.retrieve( framework="sklearn", region=region, version="0.23-1", py_version="py3", instance_type=training_instance_type, ) sklearn_estimator = SKLearn( entry_point='training.py', source_dir=BASE_DIR, instance_type=training_instance_type, instance_count=1, output_path=model_path, framework_version="0.23-1", py_version="py3", base_job_name=f"{base_job_prefix}/sts-train", sagemaker_session=sagemaker_session, role=role, ) step_train = TrainingStep( name="TrainSTSModel", estimator=sklearn_estimator, inputs={ "train": TrainingInput( s3_data=step_preprocess.properties.ProcessingOutputConfig. Outputs["train"].S3Output.S3Uri, content_type="text/csv", ), "validation": TrainingInput( s3_data=step_preprocess.properties.ProcessingOutputConfig. Outputs["validation"].S3Output.S3Uri, content_type="text/csv", ), }, ) # processing step for evaluation script_eval = ScriptProcessor( image_uri=image_uri, command=["python3"], instance_type=processing_instance_type, instance_count=1, base_job_name=f"{base_job_prefix}/script-sts-eval", sagemaker_session=sagemaker_session, role=role, ) evaluation_report = PropertyFile( name="stsEvaluationReport", output_name="evaluation", path="evaluation.json", ) step_eval = ProcessingStep( name="EvaluateSTSModel", processor=script_eval, inputs=[ ProcessingInput( source=step_train.properties.ModelArtifacts.S3ModelArtifacts, destination="/opt/ml/processing/model", ), ProcessingInput( source=step_preprocess.properties.ProcessingOutputConfig. Outputs["test"].S3Output.S3Uri, destination="/opt/ml/processing/test", ), ], outputs=[ ProcessingOutput(output_name="evaluation", source="/opt/ml/processing/evaluation"), ], code=os.path.join(BASE_DIR, "evaluate.py"), property_files=[evaluation_report], ) # setup model quality monitoring baseline data script_process_baseline_data = ScriptProcessor( image_uri=image_uri, command=["python3"], instance_type=processing_instance_type, instance_count=1, base_job_name=f"{base_job_prefix}/baseline", sagemaker_session=sagemaker_session, role=role, ) step_proccess_baseline_data = ProcessingStep( name="SetupMonitoringData", processor=script_process_baseline_data, inputs=[ ProcessingInput( source=step_train.properties.ModelArtifacts.S3ModelArtifacts, destination="/opt/ml/processing/model", ), ProcessingInput( source=step_preprocess.properties.ProcessingOutputConfig. Outputs["validation"].S3Output.S3Uri, destination="/opt/ml/processing/validation", ), ], outputs=[ ProcessingOutput(output_name="validate", source="/opt/ml/processing/validate"), ], code=os.path.join(BASE_DIR, "baseline.py")) # --- # register model step that will be conditionally executed model_metrics = ModelMetrics( model_statistics=MetricsSource(s3_uri="{}/evaluation.json".format( step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0] ["S3Output"]["S3Uri"]), content_type="application/json")) step_register = RegisterModel( name="RegisterSTSModel", estimator=sklearn_estimator, model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts, content_types=["text/csv"], response_types=["text/csv"], inference_instances=["ml.m5.xlarge"], transform_instances=["ml.m5.xlarge"], model_package_group_name=model_package_group_name, approval_status=model_approval_status, model_metrics=model_metrics, ) # condition step for evaluating model quality and branching execution cond_lte = ConditionLessThanOrEqualTo( left=JsonGet(step=step_eval, property_file=evaluation_report, json_path="regression_metrics.mse.value"), right=6.0, ) step_cond = ConditionStep( name="CheckMSESTSEvaluation", conditions=[cond_lte], if_steps=[step_register, step_proccess_baseline_data], # if_steps=[step_register], else_steps=[], ) # pipeline instance pipeline = Pipeline( name=pipeline_name, parameters=[ processing_instance_type, processing_instance_count, training_instance_type, model_approval_status, input_data, ], steps=[step_preprocess, step_train, step_eval, step_cond], sagemaker_session=sagemaker_session, ) return pipeline
def test_end_to_end_pipeline_successful_execution( sagemaker_session, region_name, role, pipeline_name, wait=False ): model_package_group_name = f"{pipeline_name}ModelPackageGroup" data_path = os.path.join(DATA_DIR, "workflow") default_bucket = sagemaker_session.default_bucket() # download the input data local_input_path = os.path.join(data_path, "abalone-dataset.csv") s3 = sagemaker_session.boto_session.resource("s3") s3.Bucket(f"sagemaker-servicecatalog-seedcode-{region_name}").download_file( "dataset/abalone-dataset.csv", local_input_path ) # # upload the input data to our bucket base_uri = f"s3://{default_bucket}/{pipeline_name}" with open(local_input_path) as data: body = data.read() input_data_uri = S3Uploader.upload_string_as_file_body( body=body, desired_s3_uri=f"{base_uri}/abalone-dataset.csv", sagemaker_session=sagemaker_session, ) # download batch transform data local_batch_path = os.path.join(data_path, "abalone-dataset-batch") s3.Bucket(f"sagemaker-servicecatalog-seedcode-{region_name}").download_file( "dataset/abalone-dataset-batch", local_batch_path ) # upload the batch transform data with open(local_batch_path) as data: body = data.read() batch_data_uri = S3Uploader.upload_string_as_file_body( body=body, desired_s3_uri=f"{base_uri}/abalone-dataset-batch", sagemaker_session=sagemaker_session, ) # define parameters processing_instance_count = ParameterInteger(name="ProcessingInstanceCount", default_value=1) processing_instance_type = ParameterString( name="ProcessingInstanceType", default_value="ml.m5.xlarge" ) training_instance_type = ParameterString( name="TrainingInstanceType", default_value="ml.m5.xlarge" ) model_approval_status = ParameterString(name="ModelApprovalStatus", default_value="Approved") input_data = ParameterString( name="InputData", default_value=input_data_uri, ) batch_data = ParameterString( name="BatchData", default_value=batch_data_uri, ) # define processing step framework_version = "0.23-1" sklearn_processor = SKLearnProcessor( framework_version=framework_version, instance_type=processing_instance_type, instance_count=processing_instance_count, base_job_name=f"{pipeline_name}-process", role=role, sagemaker_session=sagemaker_session, ) step_process = ProcessingStep( name="AbaloneProcess", processor=sklearn_processor, inputs=[ ProcessingInput(source=input_data, destination="/opt/ml/processing/input"), ], outputs=[ ProcessingOutput(output_name="train", source="/opt/ml/processing/train"), ProcessingOutput(output_name="validation", source="/opt/ml/processing/validation"), ProcessingOutput(output_name="test", source="/opt/ml/processing/test"), ], code=os.path.join(data_path, "abalone/preprocessing.py"), ) # define training step model_path = f"s3://{default_bucket}/{pipeline_name}Train" image_uri = image_uris.retrieve( framework="xgboost", region=region_name, version="1.0-1", py_version="py3", instance_type=training_instance_type, ) xgb_train = Estimator( image_uri=image_uri, instance_type=training_instance_type, instance_count=1, output_path=model_path, role=role, sagemaker_session=sagemaker_session, ) xgb_train.set_hyperparameters( objective="reg:linear", num_round=50, max_depth=5, eta=0.2, gamma=4, min_child_weight=6, subsample=0.7, silent=0, ) step_train = TrainingStep( name="AbaloneTrain", estimator=xgb_train, inputs={ "train": TrainingInput( s3_data=step_process.properties.ProcessingOutputConfig.Outputs[ "train" ].S3Output.S3Uri, content_type="text/csv", ), "validation": TrainingInput( s3_data=step_process.properties.ProcessingOutputConfig.Outputs[ "validation" ].S3Output.S3Uri, content_type="text/csv", ), }, ) # define evaluation step script_eval = ScriptProcessor( image_uri=image_uri, command=["python3"], instance_type=processing_instance_type, instance_count=1, base_job_name=f"{pipeline_name}-eval", role=role, sagemaker_session=sagemaker_session, ) evaluation_report = PropertyFile( name="EvaluationReport", output_name="evaluation", path="evaluation.json" ) step_eval = ProcessingStep( name="AbaloneEval", processor=script_eval, inputs=[ ProcessingInput( source=step_train.properties.ModelArtifacts.S3ModelArtifacts, destination="/opt/ml/processing/model", ), ProcessingInput( source=step_process.properties.ProcessingOutputConfig.Outputs[ "test" ].S3Output.S3Uri, destination="/opt/ml/processing/test", ), ], outputs=[ ProcessingOutput(output_name="evaluation", source="/opt/ml/processing/evaluation"), ], code=os.path.join(data_path, "abalone/evaluation.py"), property_files=[evaluation_report], ) # define create model step model = Model( image_uri=image_uri, model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts, sagemaker_session=sagemaker_session, role=role, ) inputs = CreateModelInput( instance_type="ml.m5.large", accelerator_type="ml.eia1.medium", ) step_create_model = CreateModelStep( name="AbaloneCreateModel", model=model, inputs=inputs, ) # define transform step transformer = Transformer( model_name=step_create_model.properties.ModelName, instance_type="ml.m5.xlarge", instance_count=1, output_path=f"s3://{default_bucket}/{pipeline_name}Transform", sagemaker_session=sagemaker_session, ) step_transform = TransformStep( name="AbaloneTransform", transformer=transformer, inputs=TransformInput(data=batch_data), ) # define register model step model_metrics = ModelMetrics( model_statistics=MetricsSource( s3_uri="{}/evaluation.json".format( step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0]["S3Output"]["S3Uri"] ), content_type="application/json", ) ) step_register = RegisterModel( name="AbaloneRegisterModel", estimator=xgb_train, model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts, content_types=["text/csv"], response_types=["text/csv"], inference_instances=["ml.t2.medium", "ml.m5.xlarge"], transform_instances=["ml.m5.xlarge"], model_package_group_name=model_package_group_name, approval_status=model_approval_status, model_metrics=model_metrics, ) # define condition step cond_lte = ConditionLessThanOrEqualTo( left=JsonGet( step_name=step_eval.name, property_file=evaluation_report, json_path="regression_metrics.mse.value", ), right=20.0, ) step_cond = ConditionStep( name="AbaloneMSECond", conditions=[cond_lte], if_steps=[step_register, step_create_model, step_transform], else_steps=[], ) # define pipeline pipeline = Pipeline( name=pipeline_name, parameters=[ processing_instance_type, processing_instance_count, training_instance_type, model_approval_status, input_data, batch_data, ], steps=[step_process, step_train, step_eval, step_cond], sagemaker_session=sagemaker_session, ) pipeline.create(role) execution = pipeline.start() execution_arn = execution.arn if wait: execution.wait() return execution_arn
def get_pipeline( region, role=None, default_bucket=None, model_package_group_name="TestPackageGroup", pipeline_name="TestPipeline", base_job_prefix="Test", ): """Gets a SageMaker ML Pipeline instance working with on abalone data. Args: region: AWS region to create and run the pipeline. role: IAM role to create and run steps and pipeline. default_bucket: the bucket to use for storing the artifacts Returns: an instance of a pipeline """ sagemaker_session = get_session(region, default_bucket) if role is None: role = sagemaker.session.get_execution_role(sagemaker_session) # parameters for pipeline execution processing_instance_count = ParameterInteger( name="ProcessingInstanceCount", default_value=1) processing_instance_type = ParameterString(name="ProcessingInstanceType", default_value="ml.m5.xlarge") training_instance_type = ParameterString(name="TrainingInstanceType", default_value="ml.m5.xlarge") model_approval_status = ParameterString( name="ModelApprovalStatus", default_value="PendingManualApproval") input_data = ParameterString( name="InputDataUrl", default_value= f"s3://sagemaker-servicecatalog-seedcode-{region}/dataset/abalone-dataset.csv", ) # processing step for feature engineering sklearn_processor = SKLearnProcessor( framework_version="0.23-1", instance_type=processing_instance_type, instance_count=processing_instance_count, base_job_name=f"{base_job_prefix}/sklearn-test-preprocess", sagemaker_session=sagemaker_session, role=role, ) step_process = ProcessingStep( name="PreprocessTestData", processor=sklearn_processor, outputs=[ ProcessingOutput(output_name="train", source="/opt/ml/processing/train"), ProcessingOutput(output_name="validation", source="/opt/ml/processing/validation"), ProcessingOutput(output_name="test", source="/opt/ml/processing/test"), ], code=os.path.join(BASE_DIR, "preprocess.py"), job_arguments=["--input-data", input_data], ) # training step for generating model artifacts model_path = f"s3://{sagemaker_session.default_bucket()}/{base_job_prefix}/TestTrain" image_uri = sagemaker.image_uris.retrieve( framework="xgboost", region=region, version="1.0-1", py_version="py3", instance_type=training_instance_type, ) xgb_train = Estimator( image_uri=image_uri, instance_type=training_instance_type, instance_count=1, output_path=model_path, base_job_name=f"{base_job_prefix}/test-train", sagemaker_session=sagemaker_session, role=role, ) xgb_train.set_hyperparameters( objective="reg:linear", num_round=50, max_depth=5, eta=0.2, gamma=4, min_child_weight=6, subsample=0.7, silent=0, ) step_train = TrainingStep( name="TrainTestModel", estimator=xgb_train, inputs={ "train": TrainingInput( s3_data=step_process.properties.ProcessingOutputConfig. Outputs["train"].S3Output.S3Uri, content_type="text/csv", ), "validation": TrainingInput( s3_data=step_process.properties.ProcessingOutputConfig. Outputs["validation"].S3Output.S3Uri, content_type="text/csv", ), }, ) # processing step for evaluation script_eval = ScriptProcessor( image_uri=image_uri, command=["python3"], instance_type=processing_instance_type, instance_count=1, base_job_name=f"{base_job_prefix}/script-test-eval", sagemaker_session=sagemaker_session, role=role, ) evaluation_report = PropertyFile( name="TestEvaluationReport", output_name="evaluation", path="evaluation.json", ) step_eval = ProcessingStep( name="EvaluateTestModel", processor=script_eval, inputs=[ ProcessingInput( source=step_train.properties.ModelArtifacts.S3ModelArtifacts, destination="/opt/ml/processing/model", ), ProcessingInput( source=step_process.properties.ProcessingOutputConfig. Outputs["test"].S3Output.S3Uri, destination="/opt/ml/processing/test", ), ], outputs=[ ProcessingOutput(output_name="evaluation", source="/opt/ml/processing/evaluation"), ], code=os.path.join(BASE_DIR, "evaluate.py"), property_files=[evaluation_report], ) # register model step that will be conditionally executed model_metrics = ModelMetrics( model_statistics=MetricsSource(s3_uri="{}/evaluation.json".format( step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0] ["S3Output"]["S3Uri"]), content_type="application/json")) step_register = RegisterModel( name="RegisterTestModel", estimator=xgb_train, model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts, content_types=["text/csv"], response_types=["text/csv"], inference_instances=["ml.t2.medium", "ml.m5.large"], transform_instances=["ml.m5.large"], model_package_group_name=model_package_group_name, approval_status=model_approval_status, model_metrics=model_metrics, ) # condition step for evaluating model quality and branching execution cond_lte = ConditionLessThanOrEqualTo( left=JsonGet(step=step_eval, property_file=evaluation_report, json_path="regression_metrics.mse.value"), right=6.0, ) step_cond = ConditionStep( name="CheckMSETestEvaluation", conditions=[cond_lte], if_steps=[step_register], else_steps=[], ) # pipeline instance pipeline = Pipeline( name=pipeline_name, parameters=[ processing_instance_type, processing_instance_count, training_instance_type, model_approval_status, input_data, ], steps=[step_process, step_train, step_eval, step_cond], sagemaker_session=sagemaker_session, ) return pipeline
def test_ppl_var_to_string_and_add(sagemaker_session, role, pipeline_name): param_str = ParameterString(name="MyString", default_value="1") param_int = ParameterInteger(name="MyInteger", default_value=3) cond = ConditionGreaterThan(left=param_str, right=param_int.to_string()) step_cond = ConditionStep( name="CondStep", conditions=[cond], if_steps=[], else_steps=[], ) join_fn1 = Join( on=" ", values=[ "condition greater than check return:", step_cond.properties.Outcome.to_string(), "and left side param str is", param_str, "and right side param int is", param_int, ], ) step_fail = FailStep( name="FailStep", error_message=join_fn1, ) pipeline = Pipeline( name=pipeline_name, parameters=[param_str, param_int], steps=[step_cond, step_fail], sagemaker_session=sagemaker_session, ) try: response = pipeline.create(role) pipeline_arn = response["PipelineArn"] execution = pipeline.start() response = execution.describe() assert response["PipelineArn"] == pipeline_arn try: execution.wait(delay=30, max_attempts=60) except WaiterError: pass execution_steps = execution.list_steps() assert len(execution_steps) == 2 for execution_step in execution_steps: if execution_step["StepName"] == "CondStep": assert execution_step["StepStatus"] == "Succeeded" continue assert execution_step["StepName"] == "FailStep" assert execution_step["StepStatus"] == "Failed" assert ( execution_step["FailureReason"] == "condition greater than check return: false " "and left side param str is 1 and right side param int is 3") # Update int param to update cond step outcome execution = pipeline.start(parameters={"MyInteger": 0}) try: execution.wait(delay=30, max_attempts=60) except WaiterError: pass execution_steps = execution.list_steps() assert len(execution_steps) == 2 for execution_step in execution_steps: if execution_step["StepName"] == "CondStep": assert execution_step["StepStatus"] == "Succeeded" continue assert execution_step["StepName"] == "FailStep" assert execution_step["StepStatus"] == "Failed" assert ( execution_step["FailureReason"] == "condition greater than check return: true " "and left side param str is 1 and right side param int is 0") finally: try: pipeline.delete() except Exception: pass