def test_register_model_tf(estimator_tf, model_metrics, drift_check_baselines):
    model_data = f"s3://{BUCKET}/model.tar.gz"
    register_model = RegisterModel(
        name="RegisterModelStep",
        estimator=estimator_tf,
        model_data=model_data,
        content_types=["content_type"],
        response_types=["response_type"],
        inference_instances=["inference_instance"],
        transform_instances=["transform_instance"],
        model_package_group_name="mpg",
        model_metrics=model_metrics,
        drift_check_baselines=drift_check_baselines,
        approval_status="Approved",
        description="description",
    )
    assert ordered(register_model.request_dicts()) == ordered(
        [
            {
                "Name": "RegisterModelStep",
                "Type": "RegisterModel",
                "Description": "description",
                "Arguments": {
                    "InferenceSpecification": {
                        "Containers": [
                            {
                                "Image": "763104351884.dkr.ecr.us-west-2.amazonaws.com/tensorflow-inference:1.15.2-cpu",
                                "ModelDataUrl": f"s3://{BUCKET}/model.tar.gz",
                            }
                        ],
                        "SupportedContentTypes": ["content_type"],
                        "SupportedRealtimeInferenceInstanceTypes": ["inference_instance"],
                        "SupportedResponseMIMETypes": ["response_type"],
                        "SupportedTransformInstanceTypes": ["transform_instance"],
                    },
                    "ModelApprovalStatus": "Approved",
                    "ModelMetrics": {
                        "Bias": {},
                        "Explainability": {},
                        "ModelQuality": {
                            "Statistics": {
                                "ContentType": "text/csv",
                                "S3Uri": f"s3://{BUCKET}/metrics.csv",
                            },
                        },
                    },
                    "DriftCheckBaselines": {
                        "ModelQuality": {
                            "Constraints": {
                                "ContentType": "text/csv",
                                "S3Uri": f"s3://{BUCKET}/constraints_metrics.csv",
                            }
                        }
                    },
                    "ModelPackageDescription": "description",
                    "ModelPackageGroupName": "mpg",
                },
            },
        ]
    )
예제 #2
0
def test_register_model(estimator, model_metrics):
    model_data = f"s3://{BUCKET}/model.tar.gz"
    register_model = RegisterModel(
        name="RegisterModelStep",
        estimator=estimator,
        model_data=model_data,
        content_types=["content_type"],
        response_types=["response_type"],
        inference_instances=["inference_instance"],
        transform_instances=["transform_instance"],
        model_package_group_name="mpg",
        model_metrics=model_metrics,
        approval_status="Approved",
        description="description",
        depends_on=["TestStep"],
        tags=[{
            "Key": "myKey",
            "Value": "myValue"
        }],
    )
    assert ordered(register_model.request_dicts()) == ordered([
        {
            "Name": "RegisterModelStep",
            "Type": "RegisterModel",
            "DependsOn": ["TestStep"],
            "Arguments": {
                "InferenceSpecification": {
                    "Containers": [{
                        "Image":
                        "fakeimage",
                        "ModelDataUrl":
                        f"s3://{BUCKET}/model.tar.gz"
                    }],
                    "SupportedContentTypes": ["content_type"],
                    "SupportedRealtimeInferenceInstanceTypes":
                    ["inference_instance"],
                    "SupportedResponseMIMETypes": ["response_type"],
                    "SupportedTransformInstanceTypes": ["transform_instance"],
                },
                "ModelApprovalStatus": "Approved",
                "ModelMetrics": {
                    "ModelQuality": {
                        "Statistics": {
                            "ContentType": "text/csv",
                            "S3Uri": f"s3://{BUCKET}/metrics.csv",
                        },
                    },
                },
                "ModelPackageDescription": "description",
                "ModelPackageGroupName": "mpg",
                "Tags": [{
                    "Key": "myKey",
                    "Value": "myValue"
                }],
            },
        },
    ])
예제 #3
0
def get_pipeline(region, role, image_uri, model_path):

    session = get_session(region)
    if role is None:
        role = sagemaker.session.get_execution_role(session)

    train_data_param = ParameterString(name='train-data')
    validation_data_param = ParameterString(name='validation-data')
    image_uri_param = ParameterString(name='image-uri')
    model_path_param = ParameterString(name='model-path')

    estimator = Estimator(image_uri=image_uri,
                          instance_type='ml.m5.xlarge',
                          instance_count=1,
                          output_path=model_path,
                          sagemaker_session=session,
                          role=role)

    ### Your Pipeline definition goes here ....
    ###########################################

    step_train = TrainingStep(name="iris-model-train",
                              estimator=estimator,
                              inputs={
                                  "train":
                                  TrainingInput(s3_data=train_data_param,
                                                content_type='text/csv'),
                                  "validation":
                                  TrainingInput(s3_data=validation_data_param,
                                                content_type='text/csv')
                              })

    step_register = RegisterModel(
        name='iris-model-register',
        model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
        estimator=estimator,
        content_types=["text/csv"],
        response_types=["text/csv"],
        inference_instances=["ml.t2.medium", "ml.m5.large"],
        transform_instances=["ml.m5.large"],
        model_package_group_name='iris-model')

    pipeline = Pipeline(name='iris-model-pipeline',
                        steps=[step_train, step_register],
                        parameters=[train_data_param, validation_data_param],
                        sagemaker_session=session)

    ### end of Pipeline definition
    ###########################################

    return pipeline
def test_model_registration_with_tensorflow_model_with_pipeline_model(
        sagemaker_session, role, tf_full_version, tf_full_py_version,
        pipeline_name, region_name):
    base_dir = os.path.join(DATA_DIR, "tensorflow_mnist")
    entry_point = os.path.join(base_dir, "mnist_v2.py")
    input_path = sagemaker_session.upload_data(
        path=os.path.join(base_dir, "data"),
        key_prefix="integ-test-data/tf-scriptmode/mnist/training",
    )
    inputs = TrainingInput(s3_data=input_path)

    instance_count = ParameterInteger(name="InstanceCount", default_value=1)
    instance_type = ParameterString(name="InstanceType",
                                    default_value="ml.m5.xlarge")

    tensorflow_estimator = TensorFlow(
        entry_point=entry_point,
        role=role,
        instance_count=instance_count,
        instance_type=instance_type,
        framework_version=tf_full_version,
        py_version=tf_full_py_version,
        sagemaker_session=sagemaker_session,
    )
    step_train = TrainingStep(
        name="MyTrain",
        estimator=tensorflow_estimator,
        inputs=inputs,
    )

    model = TensorFlowModel(
        entry_point=entry_point,
        framework_version="2.4",
        model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
        role=role,
        sagemaker_session=sagemaker_session,
    )

    pipeline_model = PipelineModel(name="MyModelPipeline",
                                   models=[model],
                                   role=role,
                                   sagemaker_session=sagemaker_session)

    step_register_model = RegisterModel(
        name="MyRegisterModel",
        model=pipeline_model,
        model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
        content_types=["application/json"],
        response_types=["application/json"],
        inference_instances=["ml.t2.medium", "ml.m5.large"],
        transform_instances=["ml.m5.large"],
        model_package_group_name=f"{pipeline_name}TestModelPackageGroup",
    )

    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[
            instance_count,
            instance_type,
        ],
        steps=[step_train, step_register_model],
        sagemaker_session=sagemaker_session,
    )

    try:
        response = pipeline.create(role)
        create_arn = response["PipelineArn"]

        assert re.match(
            rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}",
            create_arn,
        )

        for _ in retries(
                max_retry_count=5,
                exception_message_prefix=
                "Waiting for a successful execution of pipeline",
                seconds_to_sleep=10,
        ):
            execution = pipeline.start(parameters={})
            assert re.match(
                rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}/execution/",
                execution.arn,
            )
            try:
                execution.wait(delay=30, max_attempts=60)
            except WaiterError:
                pass
            execution_steps = execution.list_steps()

            assert len(execution_steps) == 3
            for step in execution_steps:
                assert step["StepStatus"] == "Succeeded"
            break
    finally:
        try:
            pipeline.delete()
        except Exception:
            pass
def test_model_registration_with_drift_check_baselines(
    sagemaker_session,
    role,
    pipeline_name,
):
    instance_count = ParameterInteger(name="InstanceCount", default_value=1)
    instance_type = ParameterString(name="InstanceType",
                                    default_value="ml.m5.xlarge")

    # upload model data to s3
    model_local_path = os.path.join(DATA_DIR, "mxnet_mnist/model.tar.gz")
    model_base_uri = "s3://{}/{}/input/model/{}".format(
        sagemaker_session.default_bucket(),
        "register_model_test_with_drift_baseline",
        utils.unique_name_from_base("model"),
    )
    model_uri = S3Uploader.upload(model_local_path,
                                  model_base_uri,
                                  sagemaker_session=sagemaker_session)
    model_uri_param = ParameterString(name="model_uri",
                                      default_value=model_uri)

    # upload metrics to s3
    metrics_data = (
        '{"regression_metrics": {"mse": {"value": 4.925353410353891, '
        '"standard_deviation": 2.219186917819692}}}')
    metrics_base_uri = "s3://{}/{}/input/metrics/{}".format(
        sagemaker_session.default_bucket(),
        "register_model_test_with_drift_baseline",
        utils.unique_name_from_base("metrics"),
    )
    metrics_uri = S3Uploader.upload_string_as_file_body(
        body=metrics_data,
        desired_s3_uri=metrics_base_uri,
        sagemaker_session=sagemaker_session,
    )
    metrics_uri_param = ParameterString(name="metrics_uri",
                                        default_value=metrics_uri)

    model_metrics = ModelMetrics(
        bias=MetricsSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
        explainability=MetricsSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
        bias_pre_training=MetricsSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
        bias_post_training=MetricsSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
    )
    drift_check_baselines = DriftCheckBaselines(
        model_statistics=MetricsSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
        model_constraints=MetricsSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
        model_data_statistics=MetricsSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
        model_data_constraints=MetricsSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
        bias_config_file=FileSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
        bias_pre_training_constraints=MetricsSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
        bias_post_training_constraints=MetricsSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
        explainability_constraints=MetricsSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
        explainability_config_file=FileSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
    )
    customer_metadata_properties = {"key1": "value1"}
    estimator = XGBoost(
        entry_point="training.py",
        source_dir=os.path.join(DATA_DIR, "sip"),
        instance_type=instance_type,
        instance_count=instance_count,
        framework_version="0.90-2",
        sagemaker_session=sagemaker_session,
        py_version="py3",
        role=role,
    )
    step_register = RegisterModel(
        name="MyRegisterModelStep",
        estimator=estimator,
        model_data=model_uri_param,
        content_types=["application/json"],
        response_types=["application/json"],
        inference_instances=["ml.t2.medium", "ml.m5.xlarge"],
        transform_instances=["ml.m5.xlarge"],
        model_package_group_name="testModelPackageGroup",
        model_metrics=model_metrics,
        drift_check_baselines=drift_check_baselines,
        customer_metadata_properties=customer_metadata_properties,
    )

    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[
            model_uri_param,
            metrics_uri_param,
            instance_type,
            instance_count,
        ],
        steps=[step_register],
        sagemaker_session=sagemaker_session,
    )

    try:
        response = pipeline.create(role)
        create_arn = response["PipelineArn"]

        for _ in retries(
                max_retry_count=5,
                exception_message_prefix=
                "Waiting for a successful execution of pipeline",
                seconds_to_sleep=10,
        ):
            execution = pipeline.start(parameters={
                "model_uri": model_uri,
                "metrics_uri": metrics_uri
            })
            response = execution.describe()

            assert response["PipelineArn"] == create_arn

            try:
                execution.wait(delay=30, max_attempts=60)
            except WaiterError:
                pass
            execution_steps = execution.list_steps()

            assert len(execution_steps) == 1
            failure_reason = execution_steps[0].get("FailureReason", "")
            if failure_reason != "":
                logging.error(
                    f"Pipeline execution failed with error: {failure_reason}."
                    " Retrying..")
                continue
            assert execution_steps[0]["StepStatus"] == "Succeeded"
            assert execution_steps[0]["StepName"] == "MyRegisterModelStep"

            response = sagemaker_session.sagemaker_client.describe_model_package(
                ModelPackageName=execution_steps[0]["Metadata"]
                ["RegisterModel"]["Arn"])

            assert (response["ModelMetrics"]["Explainability"]["Report"]
                    ["ContentType"] == "application/json")
            assert (response["DriftCheckBaselines"]["Bias"][
                "PreTrainingConstraints"]["ContentType"] == "application/json")
            assert (response["DriftCheckBaselines"]["Explainability"]
                    ["Constraints"]["ContentType"] == "application/json")
            assert (response["DriftCheckBaselines"]["ModelQuality"]
                    ["Statistics"]["ContentType"] == "application/json")
            assert (response["DriftCheckBaselines"]["ModelDataQuality"]
                    ["Statistics"]["ContentType"] == "application/json")
            assert response[
                "CustomerMetadataProperties"] == customer_metadata_properties
            break
    finally:
        try:
            pipeline.delete()
        except Exception:
            pass
def test_register_model(estimator, model_metrics, drift_check_baselines):
    model_data = f"s3://{BUCKET}/model.tar.gz"
    register_model = RegisterModel(
        name="RegisterModelStep",
        estimator=estimator,
        model_data=model_data,
        content_types=["content_type"],
        response_types=["response_type"],
        inference_instances=["inference_instance"],
        transform_instances=["transform_instance"],
        image_uri="012345678901.dkr.ecr.us-west-2.amazonaws.com/my-custom-image-uri",
        model_package_group_name="mpg",
        model_metrics=model_metrics,
        drift_check_baselines=drift_check_baselines,
        approval_status="Approved",
        description="description",
        display_name="RegisterModelStep",
        depends_on=["TestStep"],
        tags=[{"Key": "myKey", "Value": "myValue"}],
    )
    assert ordered(register_model.request_dicts()) == ordered(
        [
            {
                "Name": "RegisterModelStep",
                "Type": "RegisterModel",
                "DependsOn": ["TestStep"],
                "DisplayName": "RegisterModelStep",
                "Description": "description",
                "Arguments": {
                    "InferenceSpecification": {
                        "Containers": [
                            {
                                "Image": "012345678901.dkr.ecr.us-west-2.amazonaws.com/my-custom-image-uri",
                                "ModelDataUrl": f"s3://{BUCKET}/model.tar.gz",
                            }
                        ],
                        "SupportedContentTypes": ["content_type"],
                        "SupportedRealtimeInferenceInstanceTypes": ["inference_instance"],
                        "SupportedResponseMIMETypes": ["response_type"],
                        "SupportedTransformInstanceTypes": ["transform_instance"],
                    },
                    "ModelApprovalStatus": "Approved",
                    "ModelMetrics": {
                        "Bias": {},
                        "Explainability": {},
                        "ModelQuality": {
                            "Statistics": {
                                "ContentType": "text/csv",
                                "S3Uri": f"s3://{BUCKET}/metrics.csv",
                            },
                        },
                    },
                    "DriftCheckBaselines": {
                        "ModelQuality": {
                            "Constraints": {
                                "ContentType": "text/csv",
                                "S3Uri": f"s3://{BUCKET}/constraints_metrics.csv",
                            }
                        }
                    },
                    "ModelPackageDescription": "description",
                    "ModelPackageGroupName": "mpg",
                    "Tags": [{"Key": "myKey", "Value": "myValue"}],
                },
            },
        ]
    )
def get_pipeline(
        region,
        role=None,
        default_bucket=None,
        model_package_group_name="CustomerChurnPackageGroup",  # Choose any name
        pipeline_name="CustomerChurnDemo-p-ewf8t7lvhivm",  # You can find your pipeline name in the Studio UI (project -> Pipelines -> name)
        base_job_prefix="CustomerChurn",  # Choose any name
):
    """Gets a SageMaker ML Pipeline instance working with on CustomerChurn data.
    Args:
        region: AWS region to create and run the pipeline.
        role: IAM role to create and run steps and pipeline.
        default_bucket: the bucket to use for storing the artifacts
    Returns:
        an instance of a pipeline
    """
    sagemaker_session = get_session(region, default_bucket)
    if role is None:
        role = sagemaker.session.get_execution_role(sagemaker_session)

    # Parameters for pipeline execution
    processing_instance_count = ParameterInteger(
        name="ProcessingInstanceCount", default_value=1)
    processing_instance_type = ParameterString(name="ProcessingInstanceType",
                                               default_value="ml.m5.xlarge")
    training_instance_type = ParameterString(name="TrainingInstanceType",
                                             default_value="ml.m5.xlarge")
    model_approval_status = ParameterString(
        name="ModelApprovalStatus",
        default_value=
        "PendingManualApproval",  # ModelApprovalStatus can be set to a default of "Approved" if you don't want manual approval.
    )
    input_data = ParameterString(
        name="InputDataUrl",
        default_value=
        f"s3://sm-pipelines-demo-data-123456789/churn.txt",  # Change this to point to the s3 location of your raw input data.
    )

    # Processing step for feature engineering
    sklearn_processor = SKLearnProcessor(
        framework_version="0.23-1",
        instance_type=processing_instance_type,
        instance_count=processing_instance_count,
        base_job_name=
        f"{base_job_prefix}/sklearn-CustomerChurn-preprocess",  # choose any name
        sagemaker_session=sagemaker_session,
        role=role,
    )
    step_process = ProcessingStep(
        name="CustomerChurnProcess",  # choose any name
        processor=sklearn_processor,
        outputs=[
            ProcessingOutput(output_name="train",
                             source="/opt/ml/processing/train"),
            ProcessingOutput(output_name="validation",
                             source="/opt/ml/processing/validation"),
            ProcessingOutput(output_name="test",
                             source="/opt/ml/processing/test"),
        ],
        code=os.path.join(BASE_DIR, "preprocess.py"),
        job_arguments=["--input-data", input_data],
    )

    # Training step for generating model artifacts
    model_path = f"s3://{sagemaker_session.default_bucket()}/{base_job_prefix}/CustomerChurnTrain"
    image_uri = sagemaker.image_uris.retrieve(
        framework=
        "xgboost",  # we are using the Sagemaker built in xgboost algorithm
        region=region,
        version="1.0-1",
        py_version="py3",
        instance_type=training_instance_type,
    )
    xgb_train = Estimator(
        image_uri=image_uri,
        instance_type=training_instance_type,
        instance_count=1,
        output_path=model_path,
        base_job_name=f"{base_job_prefix}/CustomerChurn-train",
        sagemaker_session=sagemaker_session,
        role=role,
    )
    xgb_train.set_hyperparameters(
        objective="binary:logistic",
        num_round=50,
        max_depth=5,
        eta=0.2,
        gamma=4,
        min_child_weight=6,
        subsample=0.7,
        silent=0,
    )
    step_train = TrainingStep(
        name="CustomerChurnTrain",
        estimator=xgb_train,
        inputs={
            "train":
            TrainingInput(
                s3_data=step_process.properties.ProcessingOutputConfig.
                Outputs["train"].S3Output.S3Uri,
                content_type="text/csv",
            ),
            "validation":
            TrainingInput(
                s3_data=step_process.properties.ProcessingOutputConfig.
                Outputs["validation"].S3Output.S3Uri,
                content_type="text/csv",
            ),
        },
    )

    # Processing step for evaluation
    script_eval = ScriptProcessor(
        image_uri=image_uri,
        command=["python3"],
        instance_type=processing_instance_type,
        instance_count=1,
        base_job_name=f"{base_job_prefix}/script-CustomerChurn-eval",
        sagemaker_session=sagemaker_session,
        role=role,
    )
    evaluation_report = PropertyFile(
        name="EvaluationReport",
        output_name="evaluation",
        path="evaluation.json",
    )
    step_eval = ProcessingStep(
        name="CustomerChurnEval",
        processor=script_eval,
        inputs=[
            ProcessingInput(
                source=step_train.properties.ModelArtifacts.S3ModelArtifacts,
                destination="/opt/ml/processing/model",
            ),
            ProcessingInput(
                source=step_process.properties.ProcessingOutputConfig.
                Outputs["test"].S3Output.S3Uri,
                destination="/opt/ml/processing/test",
            ),
        ],
        outputs=[
            ProcessingOutput(output_name="evaluation",
                             source="/opt/ml/processing/evaluation"),
        ],
        code=os.path.join(BASE_DIR, "evaluate.py"),
        property_files=[evaluation_report],
    )

    # Register model step that will be conditionally executed
    model_metrics = ModelMetrics(model_statistics=MetricsSource(
        s3_uri="{}/evaluation.json".format(
            step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0]
            ["S3Output"]["S3Uri"]),
        content_type="application/json",
    ))

    # Register model step that will be conditionally executed
    step_register = RegisterModel(
        name="CustomerChurnRegisterModel",
        estimator=xgb_train,
        model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
        content_types=["text/csv"],
        response_types=["text/csv"],
        inference_instances=["ml.t2.medium", "ml.m5.large"],
        transform_instances=["ml.m5.large"],
        model_package_group_name=model_package_group_name,
        approval_status=model_approval_status,
        model_metrics=model_metrics,
    )

    # Condition step for evaluating model quality and branching execution
    cond_lte = ConditionGreaterThanOrEqualTo(  # You can change the condition here
        left=JsonGet(
            step=step_eval,
            property_file=evaluation_report,
            json_path=
            "binary_classification_metrics.accuracy.value",  # This should follow the structure of your report_dict defined in the evaluate.py file.
        ),
        right=0.8,  # You can change the threshold here
    )
    step_cond = ConditionStep(
        name="CustomerChurnAccuracyCond",
        conditions=[cond_lte],
        if_steps=[step_register],
        else_steps=[],
    )

    # Pipeline instance
    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[
            processing_instance_type,
            processing_instance_count,
            training_instance_type,
            model_approval_status,
            input_data,
        ],
        steps=[step_process, step_train, step_eval, step_cond],
        sagemaker_session=sagemaker_session,
    )
    return pipeline
예제 #8
0
def test_model_registration_with_model_repack(
    sagemaker_session,
    role,
    pipeline_name,
    region_name,
):
    base_dir = os.path.join(DATA_DIR, "pytorch_mnist")
    entry_point = os.path.join(base_dir, "mnist.py")
    input_path = sagemaker_session.upload_data(
        path=os.path.join(base_dir, "training"),
        key_prefix="integ-test-data/pytorch_mnist/training",
    )
    inputs = TrainingInput(s3_data=input_path)

    instance_count = ParameterInteger(name="InstanceCount", default_value=1)
    instance_type = ParameterString(name="InstanceType",
                                    default_value="ml.m5.xlarge")
    good_enough_input = ParameterInteger(name="GoodEnoughInput",
                                         default_value=1)

    pytorch_estimator = PyTorch(
        entry_point=entry_point,
        role=role,
        framework_version="1.5.0",
        py_version="py3",
        instance_count=instance_count,
        instance_type=instance_type,
        sagemaker_session=sagemaker_session,
    )
    step_train = TrainingStep(
        name="pytorch-train",
        estimator=pytorch_estimator,
        inputs=inputs,
    )

    step_register = RegisterModel(
        name="pytorch-register-model",
        estimator=pytorch_estimator,
        model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
        content_types=["*"],
        response_types=["*"],
        inference_instances=["*"],
        transform_instances=["*"],
        description="test-description",
        entry_point=entry_point,
    )

    model = Model(
        image_uri=pytorch_estimator.training_image_uri(),
        model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
        sagemaker_session=sagemaker_session,
        role=role,
    )
    model_inputs = CreateModelInput(
        instance_type="ml.m5.large",
        accelerator_type="ml.eia1.medium",
    )
    step_model = CreateModelStep(
        name="pytorch-model",
        model=model,
        inputs=model_inputs,
    )

    step_cond = ConditionStep(
        name="cond-good-enough",
        conditions=[
            ConditionGreaterThanOrEqualTo(left=good_enough_input, right=1)
        ],
        if_steps=[step_train, step_register],
        else_steps=[step_model],
    )

    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[good_enough_input, instance_count, instance_type],
        steps=[step_cond],
        sagemaker_session=sagemaker_session,
    )

    try:
        response = pipeline.create(role)
        create_arn = response["PipelineArn"]
        assert re.match(
            fr"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}",
            create_arn)

        execution = pipeline.start(parameters={})
        assert re.match(
            fr"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}/execution/",
            execution.arn,
        )

        execution = pipeline.start(parameters={"GoodEnoughInput": 0})
        assert re.match(
            fr"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}/execution/",
            execution.arn,
        )
    finally:
        try:
            pipeline.delete()
        except Exception:
            pass
def test_register_model_sip(estimator, model_metrics):
    model_list = [
        Model(image_uri="fakeimage1", model_data="Url1", env=[{"k1": "v1"}, {"k2": "v2"}]),
        Model(image_uri="fakeimage2", model_data="Url2", env=[{"k3": "v3"}, {"k4": "v4"}]),
    ]

    pipeline_model = PipelineModel(model_list, ROLE)

    register_model = RegisterModel(
        name="RegisterModelStep",
        estimator=estimator,
        content_types=["content_type"],
        response_types=["response_type"],
        inference_instances=["inference_instance"],
        transform_instances=["transform_instance"],
        model_package_group_name="mpg",
        model_metrics=model_metrics,
        approval_status="Approved",
        description="description",
        model=pipeline_model,
        depends_on=["TestStep"],
    )
    assert ordered(register_model.request_dicts()) == ordered(
        [
            {
                "Name": "RegisterModelStep",
                "Type": "RegisterModel",
                "DependsOn": ["TestStep"],
                "Arguments": {
                    "InferenceSpecification": {
                        "Containers": [
                            {
                                "Image": "fakeimage1",
                                "ModelDataUrl": "Url1",
                                "Environment": [{"k1": "v1"}, {"k2": "v2"}],
                            },
                            {
                                "Image": "fakeimage2",
                                "ModelDataUrl": "Url2",
                                "Environment": [{"k3": "v3"}, {"k4": "v4"}],
                            },
                        ],
                        "SupportedContentTypes": ["content_type"],
                        "SupportedRealtimeInferenceInstanceTypes": ["inference_instance"],
                        "SupportedResponseMIMETypes": ["response_type"],
                        "SupportedTransformInstanceTypes": ["transform_instance"],
                    },
                    "ModelApprovalStatus": "Approved",
                    "ModelMetrics": {
                        "ModelQuality": {
                            "Statistics": {
                                "ContentType": "text/csv",
                                "S3Uri": f"s3://{BUCKET}/metrics.csv",
                            },
                        },
                    },
                    "ModelPackageDescription": "description",
                    "ModelPackageGroupName": "mpg",
                },
            },
        ]
    )
예제 #10
0
def get_pipeline(
    region,
    role=None,
    default_bucket=None,
    model_package_group_name="AbalonePackageGroup",
    pipeline_name="AbalonePipeline",
    base_job_prefix="Abalone",
):
    """Gets a SageMaker ML Pipeline instance working with on abalone data.

    Args:
        region: AWS region to create and run the pipeline.
        role: IAM role to create and run steps and pipeline.
        default_bucket: the bucket to use for storing the artifacts

    Returns:
        an instance of a pipeline
    """
    sagemaker_session = get_session(region, default_bucket)
    if role is None:
        role = sagemaker.session.get_execution_role(sagemaker_session)

    # Create cache configuration
    cache_config = CacheConfig(enable_caching=True, expire_after="T30m")

    # Create SKlean processor object
    sklearn_processor = SKLearnProcessor(
        framework_version="0.20.0",
        role=role,
        instance_type=processing_instance_type,
        instance_count=processing_instance_count,
        base_job_name="credit-processing-job"
    )

    # Use the sklearn_processor in a Sagemaker pipelines ProcessingStep
    step_preprocess_data = ProcessingStep(
        name="PreprocessCreditData",
        processor=sklearn_processor,
        cache_config=cache_config,
        inputs=[
          ProcessingInput(source=input_data, destination="/opt/ml/processing/input"),  
        ],
        outputs=[
            ProcessingOutput(output_name="train", source="/opt/ml/processing/output/train"),
            ProcessingOutput(output_name="validation", source="/opt/ml/processing/output/validation"),
            ProcessingOutput(output_name="test", source="/opt/ml/processing/output/test"),
            ProcessingOutput(output_name="baseline_with_headers", source="/opt/ml/processing/output/baseline")
        ],
        code=os.path.join(BASE_DIR, "preprocessing.py"),
    )


    # Where to store the trained model
    model_path = f"s3://{default_bucket}/CreditTrain"

    # Fetch container to use for training
    image_uri = sagemaker.image_uris.retrieve(
        framework="xgboost",
        region=region,
        version="1.2-2",
        py_version="py3",
        instance_type=training_instance_type,
    )

    # Create XGBoost estimator object
    xgb_estimator = Estimator(
        image_uri=image_uri,
        instance_type=training_instance_type,
        instance_count=1,
        output_path=model_path,
        role=role,
        disable_profiler=True,
    )

    # Specify hyperparameters
    xgb_estimator.set_hyperparameters(max_depth=5,
                            eta=0.2,
                            gamma=4,
                            min_child_weight=6,
                            subsample=0.8,
                            objective='binary:logistic',
                            num_round=25)

    # Use the xgb_estimator in a Sagemaker pipelines ProcessingStep. 
    # NOTE how the input to the training job directly references the output of the previous step.
    step_train_model = TrainingStep(
        name="TrainCreditModel",
        estimator=xgb_estimator,
        cache_config=cache_config,
        inputs={
            "train": TrainingInput(
                s3_data=step_preprocess_data.properties.ProcessingOutputConfig.Outputs[
                    "train"
                ].S3Output.S3Uri,
                content_type="text/csv"
            ),
            "validation": TrainingInput(
                s3_data=step_preprocess_data.properties.ProcessingOutputConfig.Outputs[
                    "validation"
                ].S3Output.S3Uri,
                content_type="text/csv"
            )
        },
    )

    # Create ScriptProcessor object.
    evaluate_model_processor = ScriptProcessor(
        image_uri=image_uri,
        command=["python3"],
        instance_type=processing_instance_type,
        instance_count=1,
        base_job_name="script-credit-eval",
        role=role,
    )

    # Create a PropertyFile
    # We use a PropertyFile to be able to reference outputs from a processing step, for instance to use in a condition step, which we'll see later on.
    # For more information, visit https://docs.aws.amazon.com/sagemaker/latest/dg/build-and-manage-propertyfile.html
    evaluation_report = PropertyFile(
        name="EvaluationReport",
        output_name="evaluation",
        path="evaluation.json"
    )

    # Use the evaluate_model_processor in a Sagemaker pipelines ProcessingStep. 
    step_evaluate_model = ProcessingStep(
        name="EvaluateCreditModel",
        processor=evaluate_model_processor,
        cache_config=cache_config,
        inputs=[
            ProcessingInput(
                source=step_train_model.properties.ModelArtifacts.S3ModelArtifacts,
                destination="/opt/ml/processing/model"
            ),
            ProcessingInput(
                source=step_preprocess_data.properties.ProcessingOutputConfig.Outputs[
                    "test"
                ].S3Output.S3Uri,
                destination="/opt/ml/processing/test"
            )
        ],
        outputs=[
            ProcessingOutput(output_name="evaluation", source="/opt/ml/processing/evaluation"),
        ],
        code=os.path.join(BASE_DIR, "evaluation.py"),
        property_files=[evaluation_report],
    )


    model_metrics = ModelMetrics(
        model_statistics=MetricsSource(
            s3_uri="{}/evaluation.json".format(
                step_evaluate_model.arguments["ProcessingOutputConfig"]["Outputs"][0]["S3Output"]["S3Uri"]
            ),
            content_type="application/json"
        )
    )

    # Crete a RegisterModel step, which registers your model with Sagemaker Model Registry.
    step_register_model = RegisterModel(
        name="RegisterCreditModel",
        estimator=xgb_estimator,
        model_data=step_train_model.properties.ModelArtifacts.S3ModelArtifacts,
        content_types=["text/csv"],
        response_types=["text/csv"],
        inference_instances=["ml.t2.medium", "ml.m5.xlarge", "ml.m5.large"],
        transform_instances=["ml.m5.xlarge"],
        model_package_group_name=model_package_group_name,
        approval_status=model_approval_status,
        model_metrics=model_metrics
    )


    # Create Processor object using the model monitor image
    baseline_processor = sagemaker.processing.Processor(
        base_job_name="credit-risk-baseline-processor",
        image_uri=sagemaker.image_uris.retrieve(framework='model-monitor', region='eu-west-1'),
        role=role,
        instance_count=1,
        instance_type=processing_instance_type,
        env = {
            "dataset_format": "{\"csv\": {\"header\": true} }",
            "dataset_source": "/opt/ml/processing/sm_input",
            "output_path": "/opt/ml/processing/sm_output",
            "publish_cloudwatch_metrics": "Disabled"
        }
    )

    # Create a Sagemaker Pipeline step, using the baseline_processor.
    step_create_data_baseline = ProcessingStep(
        name="CreateModelQualityBaseline",
        processor=baseline_processor,
        cache_config=cache_config,
        inputs=[
            ProcessingInput(
                source=step_preprocess_data.properties.ProcessingOutputConfig.Outputs[
                    "baseline_with_headers"
                ].S3Output.S3Uri,
                destination="/opt/ml/processing/sm_input",
            )
        ],
        outputs=[
            ProcessingOutput(
                source="/opt/ml/processing/sm_output",
                destination="s3://{}/{}/baseline".format(default_bucket, base_job_prefix),
                output_name="baseline_result",
            )
        ],
    )



    # Create Condition
    cond_gte = ConditionGreaterThanOrEqualTo(
        left=JsonGet(
            step=step_evaluate_model,
            property_file=evaluation_report,
            json_path="binary_classification_metrics.accuracy.value"
        ),
        right=0.7
    )

    # Create a Sagemaker Pipelines ConditionStep, using the condition we just created.
    step_cond = ConditionStep(
        name="AccuracyCondition",
        conditions=[cond_gte],
        if_steps=[step_register_model],
        else_steps=[], 
    )

    from sagemaker.workflow.pipeline import Pipeline

    # Create a Sagemaker Pipeline
    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[
            processing_instance_type, 
            processing_instance_count,
            training_instance_type,
            model_approval_status,
            input_data,
        ],
        steps=[step_preprocess_data, step_train_model, step_evaluate_model, step_create_data_baseline, step_cond],
    )
    
    return pipeline
def get_pipeline(
    region,
    security_group_ids,
    subnets,
    processing_role=None,
    training_role=None,
    data_bucket=None,
    model_bucket=None,
    model_package_group_name="AbalonePackageGroup",
    pipeline_name="AbalonePipeline",
    base_job_prefix="Abalone",

):
    """Gets a SageMaker ML Pipeline instance working with on abalone data.

    Args:
        region: AWS region to create and run the pipeline.
        processing_role: IAM role to create and run processing steps
        training_role: IAM role to create and run training steps
        data_bucket: the bucket to use for storing the artifacts

    Returns:
        an instance of a pipeline
    """
    sagemaker_session = get_session(region, data_bucket)

    if processing_role is None:
        processing_role = sagemaker.session.get_execution_role(sagemaker_session)
    if training_role is None:
        training_role = sagemaker.session.get_execution_role(sagemaker_session)
    if model_bucket is None:
        model_bucket = sagemaker_session.default_bucket()

    print(f"Creating the pipeline '{pipeline_name}':")
    print(f"Parameters:{region}\n{security_group_ids}\n{subnets}\n{processing_role}\n\
    {training_role}\n{data_bucket}\n{model_bucket}\n{model_package_group_name}\n\
    {pipeline_name}\n{base_job_prefix}")

    # parameters for pipeline execution
    processing_instance_count = ParameterInteger(name="ProcessingInstanceCount", default_value=1)
    processing_instance_type = ParameterString(
        name="ProcessingInstanceType", default_value="ml.m5.xlarge"
    )
    training_instance_type = ParameterString(
        name="TrainingInstanceType", default_value="ml.m5.xlarge"
    )
    model_approval_status = ParameterString(
        name="ModelApprovalStatus", default_value="PendingManualApproval"
    )
    input_data = ParameterString(
        name="InputDataUrl",
        default_value=f"s3://{sagemaker_session.default_bucket()}/datasets/abalone-dataset.csv",
    )

    # configure network for encryption, network isolation and VPC configuration
    # Since the preprocessor job takes the data from S3, enable_network_isolation must be set to False
    # see https://github.com/aws/amazon-sagemaker-examples/issues/1689
    network_config = NetworkConfig(
        enable_network_isolation=False, 
        security_group_ids=security_group_ids.split(","),
        subnets=subnets.split(","),
        encrypt_inter_container_traffic=True)
    
    # processing step for feature engineering
    sklearn_processor = SKLearnProcessor(
        framework_version="0.23-1",
        instance_type=processing_instance_type,
        instance_count=processing_instance_count,
        base_job_name=f"{base_job_prefix}/sklearn-abalone-preprocess",
        sagemaker_session=sagemaker_session,
        role=processing_role,
        network_config=network_config
    )
    
    step_process = ProcessingStep(
        name="PreprocessAbaloneData",
        processor=sklearn_processor,
        outputs=[
            ProcessingOutput(output_name="train", source="/opt/ml/processing/train"),
            ProcessingOutput(output_name="validation", source="/opt/ml/processing/validation"),
            ProcessingOutput(output_name="test", source="/opt/ml/processing/test"),
        ],
        code=os.path.join(BASE_DIR, "preprocess.py"),
        job_arguments=["--input-data", input_data],
    )

    # training step for generating model artifacts
    model_path = f"s3://{model_bucket}/{base_job_prefix}/AbaloneTrain"
    image_uri = sagemaker.image_uris.retrieve(
        framework="xgboost",
        region=region,
        version="1.0-1",
        py_version="py3",
        instance_type=training_instance_type,
    )
    xgb_train = Estimator(
        image_uri=image_uri,
        instance_type=training_instance_type,
        instance_count=1,
        output_path=model_path,
        base_job_name=f"{base_job_prefix}/abalone-train",
        sagemaker_session=sagemaker_session,
        role=training_role,
        subnets=network_config.subnets,
        security_group_ids=network_config.security_group_ids,
        encrypt_inter_container_traffic=True,
        enable_network_isolation=False
    )
    xgb_train.set_hyperparameters(
        objective="reg:linear",
        num_round=50,
        max_depth=5,
        eta=0.2,
        gamma=4,
        min_child_weight=6,
        subsample=0.7,
        silent=0,
    )
    
    step_train = TrainingStep(
        name="TrainAbaloneModel",
        estimator=xgb_train,
        inputs={
            "train": TrainingInput(
                s3_data=step_process.properties.ProcessingOutputConfig.Outputs[
                    "train"
                ].S3Output.S3Uri,
                content_type="text/csv",
            ),
            "validation": TrainingInput(
                s3_data=step_process.properties.ProcessingOutputConfig.Outputs[
                    "validation"
                ].S3Output.S3Uri,
                content_type="text/csv",
            ),
        },
    )

    # processing step for evaluation
    script_eval = ScriptProcessor(
        image_uri=image_uri,
        command=["python3"],
        instance_type=processing_instance_type,
        instance_count=1,
        base_job_name=f"{base_job_prefix}/script-abalone-eval",
        sagemaker_session=sagemaker_session,
        role=processing_role,
        network_config=network_config
    )
    
    evaluation_report = PropertyFile(
        name="AbaloneEvaluationReport",
        output_name="evaluation",
        path="evaluation.json",
    )
    step_eval = ProcessingStep(
        name="EvaluateAbaloneModel",
        processor=script_eval,
        inputs=[
            ProcessingInput(
                source=step_train.properties.ModelArtifacts.S3ModelArtifacts,
                destination="/opt/ml/processing/model",
            ),
            ProcessingInput(
                source=step_process.properties.ProcessingOutputConfig.Outputs[
                    "test"
                ].S3Output.S3Uri,
                destination="/opt/ml/processing/test",
            ),
        ],
        outputs=[
            ProcessingOutput(output_name="evaluation", source="/opt/ml/processing/evaluation"),
        ],
        code=os.path.join(BASE_DIR, "evaluate.py"),
        property_files=[evaluation_report],
    )

    # register model step that will be conditionally executed
    model_metrics = ModelMetrics(
        model_statistics=MetricsSource(
            s3_uri="{}/evaluation.json".format(
                step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0]["S3Output"]["S3Uri"]
            ),
            content_type="application/json"
        )
    )

    """
    There is a bug in RegisterModel implementation
    The RegisterModel step is implemented in the SDK as two steps, a _RepackModelStep and a _RegisterModelStep. 
    The _RepackModelStep runs a SKLearn training step in order to repack the model.tar.gz to include any custom inference code in the archive. 
    The _RegisterModelStep then registers the repacked model.
    
    The problem is that the _RepackModelStep does not propagate VPC configuration from the Estimator object:
    https://github.com/aws/sagemaker-python-sdk/blob/cdb633b3ab02398c3b77f5ecd2c03cdf41049c78/src/sagemaker/workflow/_utils.py#L88

    This cause the AccessDenied exception because repacker cannot access S3 bucket (all access which is not via VPC endpoint is bloked by the bucket policy)
    
    The issue is opened against SageMaker python SDK: https://github.com/aws/sagemaker-python-sdk/issues/2302
    """

    vpc_config = {
        "Subnets":network_config.subnets,
        "SecurityGroupIds":network_config.security_group_ids
    }

    step_register = RegisterModel(
        name="RegisterAbaloneModel",
        estimator=xgb_train,
        model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
        content_types=["text/csv"],
        response_types=["text/csv"],
        inference_instances=["ml.t2.medium", "ml.m5.large"],
        transform_instances=["ml.m5.large"],
        model_package_group_name=model_package_group_name,
        approval_status=model_approval_status,
        model_metrics=model_metrics,
        vpc_config_override=vpc_config
    )

    # condition step for evaluating model quality and branching execution
    cond_lte = ConditionLessThanOrEqualTo(
        left=JsonGet(
            step=step_eval,
            property_file=evaluation_report,
            json_path="regression_metrics.mse.value"
        ),
        right=6.0,
    )
    step_cond = ConditionStep(
        name="CheckMSEAbaloneEvaluation",
        conditions=[cond_lte],
        if_steps=[step_register],
        else_steps=[],
    )

    # pipeline instance
    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[
            processing_instance_type,
            processing_instance_count,
            training_instance_type,
            model_approval_status,
            input_data,
        ],
        steps=[step_process, step_train, step_eval, step_cond],
        sagemaker_session=sagemaker_session,
    )
    return pipeline
예제 #12
0
def get_pipeline(
    region,
    sagemaker_session,
    role=None,
    default_bucket=None,
    model_package_group_name="sts-sklearn-grp",
    pipeline_name="stsPipeline",
    base_job_prefix="sts",
) -> Pipeline:
    """Gets a SageMaker ML Pipeline instance working with on sts data.

    Args:
        region: AWS region to create and run the pipeline.
        role: IAM role to create and run steps and pipeline.
        default_bucket: the bucket to use for storing the artifacts

    Returns:
        an instance of a pipeline
    """
    """
        Instance types allowed:
        
        ml.r5.12xlarge, ml.m5.4xlarge, ml.p2.xlarge, ml.m4.16xlarge, ml.r5.24xlarge, 
        ml.t3.xlarge, ml.r5.16xlarge, ml.m5.large, ml.p3.16xlarge, ml.p2.16xlarge, 
        ml.c4.2xlarge, ml.c5.2xlarge, ml.c4.4xlarge, ml.c5.4xlarge, ml.c4.8xlarge, 
        ml.c5.9xlarge, ml.c5.xlarge, ml.c4.xlarge, ml.t3.2xlarge, ml.t3.medium, 
        ml.c5.18xlarge, ml.r5.2xlarge, ml.p3.2xlarge, ml.m5.xlarge, ml.m4.10xlarge, 
        ml.r5.4xlarge, ml.m5.12xlarge, ml.m4.xlarge, ml.t3.large, ml.m5.24xlarge, 
        ml.m4.2xlarge, ml.m5.2xlarge, ml.p2.8xlarge, ml.r5.8xlarge, ml.r5.xlarge, 
        ml.r5.large, ml.p3.8xlarge, ml.m4.4xlarge

        see
        https://aws.amazon.com/blogs/machine-learning/right-sizing-resources-and-avoiding-unnecessary-costs-in-amazon-sagemaker/
    """
    sagemaker_session = get_session(region, default_bucket)
    if role is None:
        role = sagemaker.session.get_execution_role(sagemaker_session)

    # parameters for pipeline execution
    processing_instance_count = ParameterInteger(
        name="ProcessingInstanceCount", default_value=1)
    processing_instance_type = ParameterString(name="ProcessingInstanceType",
                                               default_value="ml.m5.xlarge")

    # as of free tier of 50 hours of m4.xlarge or m5.xlarge instances
    training_instance_type = ParameterString(name="TrainingInstanceType",
                                             default_value="ml.m5.xlarge")
    model_approval_status = ParameterString(name="ModelApprovalStatus",
                                            default_value="Approved")

    # preprocess

    # preprocess input data
    input_data = ParameterString(
        name="InputDataUrl",
        default_value=f"s3://sts-datwit-dataset/stsmsrpc.txt",
    )

    # processing step for feature engineering
    sklearn_processor = SKLearnProcessor(
        framework_version="0.23-1",
        instance_type=processing_instance_type,
        instance_count=processing_instance_count,
        base_job_name=f"{base_job_prefix}/sklearn-sts-preprocess",
        sagemaker_session=sagemaker_session,
        role=role,
    )

    step_preprocess = ProcessingStep(
        name="PreprocessSTSData",
        processor=sklearn_processor,
        outputs=[
            ProcessingOutput(output_name="train",
                             source="/opt/ml/processing/train"),
            ProcessingOutput(output_name="validation",
                             source="/opt/ml/processing/validation"),
            ProcessingOutput(output_name="test",
                             source="/opt/ml/processing/test"),
        ],
        code=os.path.join(BASE_DIR, "preprocess.py"),
        job_arguments=["--input-data", input_data],
    )

    # training step for generating model artifacts
    model_path = f"s3://{sagemaker_session.default_bucket()}/{base_job_prefix}/stsTrain"
    image_uri = sagemaker.image_uris.retrieve(
        framework="sklearn",
        region=region,
        version="0.23-1",
        py_version="py3",
        instance_type=training_instance_type,
    )

    sklearn_estimator = SKLearn(
        entry_point='training.py',
        source_dir=BASE_DIR,
        instance_type=training_instance_type,
        instance_count=1,
        output_path=model_path,
        framework_version="0.23-1",
        py_version="py3",
        base_job_name=f"{base_job_prefix}/sts-train",
        sagemaker_session=sagemaker_session,
        role=role,
    )

    step_train = TrainingStep(
        name="TrainSTSModel",
        estimator=sklearn_estimator,
        inputs={
            "train":
            TrainingInput(
                s3_data=step_preprocess.properties.ProcessingOutputConfig.
                Outputs["train"].S3Output.S3Uri,
                content_type="text/csv",
            ),
            "validation":
            TrainingInput(
                s3_data=step_preprocess.properties.ProcessingOutputConfig.
                Outputs["validation"].S3Output.S3Uri,
                content_type="text/csv",
            ),
        },
    )

    # processing step for evaluation
    script_eval = ScriptProcessor(
        image_uri=image_uri,
        command=["python3"],
        instance_type=processing_instance_type,
        instance_count=1,
        base_job_name=f"{base_job_prefix}/script-sts-eval",
        sagemaker_session=sagemaker_session,
        role=role,
    )
    evaluation_report = PropertyFile(
        name="stsEvaluationReport",
        output_name="evaluation",
        path="evaluation.json",
    )
    step_eval = ProcessingStep(
        name="EvaluateSTSModel",
        processor=script_eval,
        inputs=[
            ProcessingInput(
                source=step_train.properties.ModelArtifacts.S3ModelArtifacts,
                destination="/opt/ml/processing/model",
            ),
            ProcessingInput(
                source=step_preprocess.properties.ProcessingOutputConfig.
                Outputs["test"].S3Output.S3Uri,
                destination="/opt/ml/processing/test",
            ),
        ],
        outputs=[
            ProcessingOutput(output_name="evaluation",
                             source="/opt/ml/processing/evaluation"),
        ],
        code=os.path.join(BASE_DIR, "evaluate.py"),
        property_files=[evaluation_report],
    )

    # setup model quality monitoring baseline data
    script_process_baseline_data = ScriptProcessor(
        image_uri=image_uri,
        command=["python3"],
        instance_type=processing_instance_type,
        instance_count=1,
        base_job_name=f"{base_job_prefix}/baseline",
        sagemaker_session=sagemaker_session,
        role=role,
    )

    step_proccess_baseline_data = ProcessingStep(
        name="SetupMonitoringData",
        processor=script_process_baseline_data,
        inputs=[
            ProcessingInput(
                source=step_train.properties.ModelArtifacts.S3ModelArtifacts,
                destination="/opt/ml/processing/model",
            ),
            ProcessingInput(
                source=step_preprocess.properties.ProcessingOutputConfig.
                Outputs["validation"].S3Output.S3Uri,
                destination="/opt/ml/processing/validation",
            ),
        ],
        outputs=[
            ProcessingOutput(output_name="validate",
                             source="/opt/ml/processing/validate"),
        ],
        code=os.path.join(BASE_DIR, "baseline.py"))
    # ---

    # register model step that will be conditionally executed
    model_metrics = ModelMetrics(
        model_statistics=MetricsSource(s3_uri="{}/evaluation.json".format(
            step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0]
            ["S3Output"]["S3Uri"]),
                                       content_type="application/json"))

    step_register = RegisterModel(
        name="RegisterSTSModel",
        estimator=sklearn_estimator,
        model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
        content_types=["text/csv"],
        response_types=["text/csv"],
        inference_instances=["ml.m5.xlarge"],
        transform_instances=["ml.m5.xlarge"],
        model_package_group_name=model_package_group_name,
        approval_status=model_approval_status,
        model_metrics=model_metrics,
    )

    # condition step for evaluating model quality and branching execution
    cond_lte = ConditionLessThanOrEqualTo(
        left=JsonGet(step=step_eval,
                     property_file=evaluation_report,
                     json_path="regression_metrics.mse.value"),
        right=6.0,
    )
    step_cond = ConditionStep(
        name="CheckMSESTSEvaluation",
        conditions=[cond_lte],
        if_steps=[step_register, step_proccess_baseline_data],
        # if_steps=[step_register],
        else_steps=[],
    )

    # pipeline instance
    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[
            processing_instance_type,
            processing_instance_count,
            training_instance_type,
            model_approval_status,
            input_data,
        ],
        steps=[step_preprocess, step_train, step_eval, step_cond],
        sagemaker_session=sagemaker_session,
    )
    return pipeline
예제 #13
0
def test_end_to_end_pipeline_successful_execution(
    sagemaker_session, region_name, role, pipeline_name, wait=False
):
    model_package_group_name = f"{pipeline_name}ModelPackageGroup"
    data_path = os.path.join(DATA_DIR, "workflow")
    default_bucket = sagemaker_session.default_bucket()

    # download the input data
    local_input_path = os.path.join(data_path, "abalone-dataset.csv")
    s3 = sagemaker_session.boto_session.resource("s3")
    s3.Bucket(f"sagemaker-servicecatalog-seedcode-{region_name}").download_file(
        "dataset/abalone-dataset.csv", local_input_path
    )

    # # upload the input data to our bucket
    base_uri = f"s3://{default_bucket}/{pipeline_name}"
    with open(local_input_path) as data:
        body = data.read()
        input_data_uri = S3Uploader.upload_string_as_file_body(
            body=body,
            desired_s3_uri=f"{base_uri}/abalone-dataset.csv",
            sagemaker_session=sagemaker_session,
        )

    # download batch transform data
    local_batch_path = os.path.join(data_path, "abalone-dataset-batch")
    s3.Bucket(f"sagemaker-servicecatalog-seedcode-{region_name}").download_file(
        "dataset/abalone-dataset-batch", local_batch_path
    )

    # upload the batch transform data
    with open(local_batch_path) as data:
        body = data.read()
        batch_data_uri = S3Uploader.upload_string_as_file_body(
            body=body,
            desired_s3_uri=f"{base_uri}/abalone-dataset-batch",
            sagemaker_session=sagemaker_session,
        )

    # define parameters
    processing_instance_count = ParameterInteger(name="ProcessingInstanceCount", default_value=1)
    processing_instance_type = ParameterString(
        name="ProcessingInstanceType", default_value="ml.m5.xlarge"
    )
    training_instance_type = ParameterString(
        name="TrainingInstanceType", default_value="ml.m5.xlarge"
    )
    model_approval_status = ParameterString(name="ModelApprovalStatus", default_value="Approved")
    input_data = ParameterString(
        name="InputData",
        default_value=input_data_uri,
    )
    batch_data = ParameterString(
        name="BatchData",
        default_value=batch_data_uri,
    )

    # define processing step
    framework_version = "0.23-1"
    sklearn_processor = SKLearnProcessor(
        framework_version=framework_version,
        instance_type=processing_instance_type,
        instance_count=processing_instance_count,
        base_job_name=f"{pipeline_name}-process",
        role=role,
        sagemaker_session=sagemaker_session,
    )
    step_process = ProcessingStep(
        name="AbaloneProcess",
        processor=sklearn_processor,
        inputs=[
            ProcessingInput(source=input_data, destination="/opt/ml/processing/input"),
        ],
        outputs=[
            ProcessingOutput(output_name="train", source="/opt/ml/processing/train"),
            ProcessingOutput(output_name="validation", source="/opt/ml/processing/validation"),
            ProcessingOutput(output_name="test", source="/opt/ml/processing/test"),
        ],
        code=os.path.join(data_path, "abalone/preprocessing.py"),
    )

    # define training step
    model_path = f"s3://{default_bucket}/{pipeline_name}Train"
    image_uri = image_uris.retrieve(
        framework="xgboost",
        region=region_name,
        version="1.0-1",
        py_version="py3",
        instance_type=training_instance_type,
    )
    xgb_train = Estimator(
        image_uri=image_uri,
        instance_type=training_instance_type,
        instance_count=1,
        output_path=model_path,
        role=role,
        sagemaker_session=sagemaker_session,
    )
    xgb_train.set_hyperparameters(
        objective="reg:linear",
        num_round=50,
        max_depth=5,
        eta=0.2,
        gamma=4,
        min_child_weight=6,
        subsample=0.7,
        silent=0,
    )
    step_train = TrainingStep(
        name="AbaloneTrain",
        estimator=xgb_train,
        inputs={
            "train": TrainingInput(
                s3_data=step_process.properties.ProcessingOutputConfig.Outputs[
                    "train"
                ].S3Output.S3Uri,
                content_type="text/csv",
            ),
            "validation": TrainingInput(
                s3_data=step_process.properties.ProcessingOutputConfig.Outputs[
                    "validation"
                ].S3Output.S3Uri,
                content_type="text/csv",
            ),
        },
    )

    # define evaluation step
    script_eval = ScriptProcessor(
        image_uri=image_uri,
        command=["python3"],
        instance_type=processing_instance_type,
        instance_count=1,
        base_job_name=f"{pipeline_name}-eval",
        role=role,
        sagemaker_session=sagemaker_session,
    )
    evaluation_report = PropertyFile(
        name="EvaluationReport", output_name="evaluation", path="evaluation.json"
    )
    step_eval = ProcessingStep(
        name="AbaloneEval",
        processor=script_eval,
        inputs=[
            ProcessingInput(
                source=step_train.properties.ModelArtifacts.S3ModelArtifacts,
                destination="/opt/ml/processing/model",
            ),
            ProcessingInput(
                source=step_process.properties.ProcessingOutputConfig.Outputs[
                    "test"
                ].S3Output.S3Uri,
                destination="/opt/ml/processing/test",
            ),
        ],
        outputs=[
            ProcessingOutput(output_name="evaluation", source="/opt/ml/processing/evaluation"),
        ],
        code=os.path.join(data_path, "abalone/evaluation.py"),
        property_files=[evaluation_report],
    )

    # define create model step
    model = Model(
        image_uri=image_uri,
        model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
        sagemaker_session=sagemaker_session,
        role=role,
    )
    inputs = CreateModelInput(
        instance_type="ml.m5.large",
        accelerator_type="ml.eia1.medium",
    )
    step_create_model = CreateModelStep(
        name="AbaloneCreateModel",
        model=model,
        inputs=inputs,
    )

    # define transform step
    transformer = Transformer(
        model_name=step_create_model.properties.ModelName,
        instance_type="ml.m5.xlarge",
        instance_count=1,
        output_path=f"s3://{default_bucket}/{pipeline_name}Transform",
        sagemaker_session=sagemaker_session,
    )
    step_transform = TransformStep(
        name="AbaloneTransform",
        transformer=transformer,
        inputs=TransformInput(data=batch_data),
    )

    # define register model step
    model_metrics = ModelMetrics(
        model_statistics=MetricsSource(
            s3_uri="{}/evaluation.json".format(
                step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0]["S3Output"]["S3Uri"]
            ),
            content_type="application/json",
        )
    )
    step_register = RegisterModel(
        name="AbaloneRegisterModel",
        estimator=xgb_train,
        model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
        content_types=["text/csv"],
        response_types=["text/csv"],
        inference_instances=["ml.t2.medium", "ml.m5.xlarge"],
        transform_instances=["ml.m5.xlarge"],
        model_package_group_name=model_package_group_name,
        approval_status=model_approval_status,
        model_metrics=model_metrics,
    )

    # define condition step
    cond_lte = ConditionLessThanOrEqualTo(
        left=JsonGet(
            step_name=step_eval.name,
            property_file=evaluation_report,
            json_path="regression_metrics.mse.value",
        ),
        right=20.0,
    )

    step_cond = ConditionStep(
        name="AbaloneMSECond",
        conditions=[cond_lte],
        if_steps=[step_register, step_create_model, step_transform],
        else_steps=[],
    )

    # define pipeline
    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[
            processing_instance_type,
            processing_instance_count,
            training_instance_type,
            model_approval_status,
            input_data,
            batch_data,
        ],
        steps=[step_process, step_train, step_eval, step_cond],
        sagemaker_session=sagemaker_session,
    )

    pipeline.create(role)
    execution = pipeline.start()
    execution_arn = execution.arn

    if wait:
        execution.wait()

    return execution_arn
예제 #14
0
def test_model_registration_with_tuning_model(
    sagemaker_session,
    role,
    cpu_instance_type,
    pipeline_name,
    region_name,
):
    base_dir = os.path.join(DATA_DIR, "pytorch_mnist")
    entry_point = os.path.join(base_dir, "mnist.py")
    input_path = sagemaker_session.upload_data(
        path=os.path.join(base_dir, "training"),
        key_prefix="integ-test-data/pytorch_mnist/training",
    )
    inputs = TrainingInput(s3_data=input_path)

    instance_count = ParameterInteger(name="InstanceCount", default_value=1)
    instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")

    pytorch_estimator = PyTorch(
        entry_point=entry_point,
        role=role,
        framework_version="1.5.0",
        py_version="py3",
        instance_count=instance_count,
        instance_type=instance_type,
        sagemaker_session=sagemaker_session,
        enable_sagemaker_metrics=True,
        max_retry_attempts=3,
    )

    min_batch_size = ParameterString(name="MinBatchSize", default_value="64")
    max_batch_size = ParameterString(name="MaxBatchSize", default_value="128")
    hyperparameter_ranges = {
        "batch-size": IntegerParameter(min_batch_size, max_batch_size),
    }

    tuner = HyperparameterTuner(
        estimator=pytorch_estimator,
        objective_metric_name="test:acc",
        objective_type="Maximize",
        hyperparameter_ranges=hyperparameter_ranges,
        metric_definitions=[{"Name": "test:acc", "Regex": "Overall test accuracy: (.*?);"}],
        max_jobs=2,
        max_parallel_jobs=2,
    )

    step_tune = TuningStep(
        name="my-tuning-step",
        tuner=tuner,
        inputs=inputs,
    )

    step_register_best = RegisterModel(
        name="my-model-regis",
        estimator=pytorch_estimator,
        model_data=step_tune.get_top_model_s3_uri(
            top_k=0,
            s3_bucket=sagemaker_session.default_bucket(),
        ),
        content_types=["text/csv"],
        response_types=["text/csv"],
        inference_instances=["ml.t2.medium", "ml.m5.large"],
        transform_instances=["ml.m5.large"],
        entry_point=entry_point,
    )

    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[instance_count, instance_type, min_batch_size, max_batch_size],
        steps=[step_tune, step_register_best],
        sagemaker_session=sagemaker_session,
    )

    try:
        response = pipeline.create(role)
        create_arn = response["PipelineArn"]
        assert re.match(
            rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}",
            create_arn,
        )

        for _ in retries(
            max_retry_count=5,
            exception_message_prefix="Waiting for a successful execution of pipeline",
            seconds_to_sleep=10,
        ):
            execution = pipeline.start(parameters={})
            assert re.match(
                rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}/execution/",
                execution.arn,
            )
            try:
                execution.wait(delay=30, max_attempts=60)
            except WaiterError:
                pass
            execution_steps = execution.list_steps()

            assert len(execution_steps) == 3
            for step in execution_steps:
                assert step["StepStatus"] == "Succeeded"
            break
    finally:
        try:
            pipeline.delete()
        except Exception:
            pass
예제 #15
0
파일: pipeline.py 프로젝트: amanveer/FSI-1
def get_pipeline(
    region,
    role=None,
    default_bucket=None,
    model_package_group_name="sagemaker-group-insurance",
    pipeline_name="sagemaker-pipeline-insurance",
    base_job_prefix="sagemaker-featurestore-insurance",
):
    """Gets a SageMaker ML Pipeline instance working with on WIP data.

    Args:
        region: AWS region to create and run the pipeline.
        role: IAM role to create and run steps and pipeline.
        default_bucket: the bucket to use for storing the artifacts

    Returns:
        an instance of a pipeline
    """
    sagemaker_session = get_session(region, default_bucket)
    if role is None:
        role = sagemaker.session.get_execution_role(sagemaker_session)

    # parameters for pipeline execution
    processing_instance_count = ParameterInteger(name="ProcessingInstanceCount", default_value=1)
    processing_instance_type = ParameterString(
        name="ProcessingInstanceType", default_value="ml.m5.xlarge"
    )
    training_instance_type = ParameterString(
        name="TrainingInstanceType", default_value="ml.m5.xlarge"
    )
    model_approval_status = ParameterString(
        name="ModelApprovalStatus", default_value="Approved"
    )

    # processing step for feature engineering
    sklearn_processor = SKLearnProcessor(
        framework_version="0.23-1",
        instance_type=processing_instance_type,
        instance_count=processing_instance_count,
        base_job_name=f"{base_job_prefix}/sklearn-insurance-preprocess",
        sagemaker_session=sagemaker_session,
        role=role,
    )
    step_process = ProcessingStep(
        name="PreprocessInsuranceData",
        processor=sklearn_processor,
        outputs=[
            ProcessingOutput(output_name="train", source="/opt/ml/processing/train"),
            ProcessingOutput(output_name="validation", source="/opt/ml/processing/validation"),
            ProcessingOutput(output_name="test", source="/opt/ml/processing/test"),
        ],
        code=os.path.join(BASE_DIR, "preprocess.py"),
        job_arguments=["--input_dataset_1", "41214", 
                       "--input_dataset_2", "41215",],
    )
    
    '''
    # feature store step
    feature_path = 's3://' + default_bucket+'/'+base_job_prefix + '/features'
    image_uri = sagemaker.image_uris.retrieve(
        framework="xgboost",
        region=region,
        version="1.0-1",
        py_version="py3",
        instance_type=training_instance_type,
    )
    feature_processor = ScriptProcessor(
        image_uri=image_uri,
        command=["python3"],
        instance_type=processing_instance_type,
        instance_count=1,
        base_job_name=f"{base_job_prefix}/script-insurance-feature-store",
        sagemaker_session=sagemaker_session,
        role=role,
    )
    step_feature = ProcessingStep(
        name="FeatureStoreInsuranceData",
        processor=feature_processor,
        outputs=[
            ProcessingOutput(output_name="train", source="/opt/ml/processing/training_input"),
        ],
        code=os.path.join(BASE_DIR, "feature_store.py"),
        job_arguments=["feature_s3_url", feature_path, "--feature_group_name", "sagemaker-featurestore-insurance"],
    )
    '''    

    # training step for generating model artifacts
    model_path = 's3://' + default_bucket+'/'+base_job_prefix + '/features'
    image_uri = sagemaker.image_uris.retrieve(
        framework="xgboost",
        region=region,
        version="1.0-1",
        py_version="py3",
        instance_type=training_instance_type,
    )
    xgb_train = Estimator(
        image_uri=image_uri,
        instance_type=training_instance_type,
        instance_count=1,
        output_path=model_path,
        base_job_name=f"{base_job_prefix}/insurance-train",
        sagemaker_session=sagemaker_session,
        role=role,
    )
    xgb_train.set_hyperparameters(objective = "reg:tweedie",
                                   num_round = 50)        
    step_train = TrainingStep(
        name="TrainAbaloneModel",
        estimator=xgb_train,
        inputs={
            "train": TrainingInput(
                s3_data=step_process.properties.ProcessingOutputConfig.Outputs[
                    "train"
                ].S3Output.S3Uri,
                content_type="text/csv",
            ),
            "validation": TrainingInput(
                s3_data=step_process.properties.ProcessingOutputConfig.Outputs[
                    "validation"
                ].S3Output.S3Uri,
                content_type="text/csv",
            ),
        },
    )

    # processing step for evaluation
    script_eval = ScriptProcessor(
        image_uri=image_uri,
        command=["python3"],
        instance_type=processing_instance_type,
        instance_count=1,
        base_job_name=f"{base_job_prefix}/script-wip-eval",
        sagemaker_session=sagemaker_session,
        role=role,
    )
    evaluation_report = PropertyFile(
        name="WipEvaluationReport",
        output_name="evaluation",
        path="evaluation.json",
    )
    step_eval = ProcessingStep(
        name="EvaluateWipModel",
        processor=script_eval,
        inputs=[
            ProcessingInput(
                source=step_train.properties.ModelArtifacts.S3ModelArtifacts,
                destination="/opt/ml/processing/model",
            ),
            ProcessingInput(
                source=step_process.properties.ProcessingOutputConfig.Outputs[
                    "test"
                ].S3Output.S3Uri,
                destination="/opt/ml/processing/test",
            ),
        ],
        outputs=[
            ProcessingOutput(output_name="evaluation", source="/opt/ml/processing/evaluation"),
        ],
        code=os.path.join(BASE_DIR, "evaluate.py"),
        property_files=[evaluation_report],
    )

    # register model step that will be conditionally executed
    model_metrics = ModelMetrics(
        model_statistics=MetricsSource(
            s3_uri="{}/evaluation.json".format(
                step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0]["S3Output"]["S3Uri"]
            ),
            content_type="application/json"
        )
    )
    step_register = RegisterModel(
        name="register-insurance-model",
        estimator=xgb_train,
        model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
        content_types=["text/csv"],
        response_types=["text/csv"],
        inference_instances=["ml.t2.medium", "ml.m5.large"],
        transform_instances=["ml.m5.large"],
        model_package_group_name=model_package_group_name,
        approval_status=model_approval_status,
        model_metrics=model_metrics,
    )

    # condition step for evaluating model quality and branching execution
    cond_lte = ConditionLessThanOrEqualTo(
        left=JsonGet(
            step=step_eval,
            property_file=evaluation_report,
            json_path="regression_metrics.mse.value"
        ),
        right=6.0,
    )
    step_cond = ConditionStep(
        name="CheckMSEWipEvaluation",
        conditions=[cond_lte],
        if_steps=[],
        else_steps=[step_register],
    )

    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[
            processing_instance_type,
            processing_instance_count,
            training_instance_type,
            model_approval_status,
        ],
        steps=[step_process, step_train, step_eval, step_cond],
        sagemaker_session=sagemaker_session,
    )
    return pipeline
def get_pipeline(region, role, default_bucket, pipeline_name,
                 model_package_group_name, base_job_prefix):
    """Gets a SageMaker ML Pipeline instance working with BERT.

    Args:
        region: AWS region to create and run the pipeline.
        role: IAM role to create and run steps and pipeline.
        default_bucket: the bucket to use for storing the artifacts
        pipeline_name:  name of this pipeline
        model_package_group_name:  model package group
        base_job_prefix:  prefic of the job name

    Returns:
        an instance of a pipeline
    """

    sm = boto3.Session().client(service_name="sagemaker", region_name=region)

    input_data = ParameterString(
        name="InputDataUrl",
        default_value="s3://{}/amazon-reviews-pds/tsv/".format(bucket),
    )

    processing_instance_count = ParameterInteger(
        name="ProcessingInstanceCount", default_value=1)

    processing_instance_type = ParameterString(name="ProcessingInstanceType",
                                               default_value="ml.c5.2xlarge")

    max_seq_length = ParameterInteger(
        name="MaxSeqLength",
        default_value=64,
    )

    balance_dataset = ParameterString(
        name="BalanceDataset",
        default_value="True",
    )

    train_split_percentage = ParameterFloat(
        name="TrainSplitPercentage",
        default_value=0.90,
    )

    validation_split_percentage = ParameterFloat(
        name="ValidationSplitPercentage",
        default_value=0.05,
    )

    test_split_percentage = ParameterFloat(
        name="TestSplitPercentage",
        default_value=0.05,
    )

    feature_store_offline_prefix = ParameterString(
        name="FeatureStoreOfflinePrefix",
        default_value="reviews-feature-store-" + str(timestamp),
    )

    feature_group_name = ParameterString(
        name="FeatureGroupName",
        default_value="reviews-feature-group-" + str(timestamp))

    train_instance_type = ParameterString(name="TrainInstanceType",
                                          default_value="ml.c5.9xlarge")

    train_instance_count = ParameterInteger(name="TrainInstanceCount",
                                            default_value=1)

    #########################
    # PROCESSING STEP
    #########################

    processor = SKLearnProcessor(
        framework_version="0.23-1",
        role=role,
        instance_type=processing_instance_type,
        instance_count=processing_instance_count,
        env={"AWS_DEFAULT_REGION": region},
        max_runtime_in_seconds=7200,
    )

    processing_inputs = [
        ProcessingInput(
            input_name="raw-input-data",
            source=input_data,
            destination="/opt/ml/processing/input/data/",
            s3_data_distribution_type="ShardedByS3Key",
        )
    ]

    processing_outputs = [
        ProcessingOutput(
            output_name="bert-train",
            s3_upload_mode="EndOfJob",
            source="/opt/ml/processing/output/bert/train",
        ),
        ProcessingOutput(
            output_name="bert-validation",
            s3_upload_mode="EndOfJob",
            source="/opt/ml/processing/output/bert/validation",
        ),
        ProcessingOutput(
            output_name="bert-test",
            s3_upload_mode="EndOfJob",
            source="/opt/ml/processing/output/bert/test",
        ),
    ]

    # TODO:  Figure out why the Parameter's are not resolving properly to their native type when user here.
    #        We shouldn't be using `default_value`
    processing_step = ProcessingStep(
        name="Processing",
        processor=processor,
        inputs=processing_inputs,
        outputs=processing_outputs,
        job_arguments=[
            "--train-split-percentage",
            str(train_split_percentage.default_value),
            "--validation-split-percentage",
            str(validation_split_percentage.default_value),
            "--test-split-percentage",
            str(test_split_percentage.default_value),
            "--max-seq-length",
            str(max_seq_length.default_value),
            "--balance-dataset",
            str(balance_dataset.default_value),
            "--feature-store-offline-prefix",
            str(feature_store_offline_prefix.default_value),
            "--feature-group-name",
            str(feature_group_name.default_value),
        ],
        code=os.path.join(BASE_DIR,
                          "preprocess-scikit-text-to-bert-feature-store.py"),
    )

    #########################
    # TRAINING STEP
    #########################

    epochs = ParameterInteger(name="Epochs", default_value=1)

    learning_rate = ParameterFloat(name="LearningRate", default_value=0.00001)

    epsilon = ParameterFloat(name="Epsilon", default_value=0.00000001)

    train_batch_size = ParameterInteger(name="TrainBatchSize",
                                        default_value=128)

    validation_batch_size = ParameterInteger(name="ValidationBatchSize",
                                             default_value=128)

    test_batch_size = ParameterInteger(name="TestBatchSize", default_value=128)

    train_steps_per_epoch = ParameterInteger(name="TrainStepsPerEpoch",
                                             default_value=50)

    validation_steps = ParameterInteger(name="ValidationSteps",
                                        default_value=50)

    test_steps = ParameterInteger(name="TestSteps", default_value=50)

    train_volume_size = ParameterInteger(name="TrainVolumeSize",
                                         default_value=1024)

    use_xla = ParameterString(
        name="UseXLA",
        default_value="True",
    )

    use_amp = ParameterString(
        name="UseAMP",
        default_value="True",
    )

    freeze_bert_layer = ParameterString(
        name="FreezeBERTLayer",
        default_value="False",
    )

    enable_sagemaker_debugger = ParameterString(
        name="EnableSageMakerDebugger",
        default_value="False",
    )

    enable_checkpointing = ParameterString(
        name="EnableCheckpointing",
        default_value="False",
    )

    enable_tensorboard = ParameterString(
        name="EnableTensorboard",
        default_value="False",
    )

    input_mode = ParameterString(
        name="InputMode",
        default_value="File",
    )

    run_validation = ParameterString(
        name="RunValidation",
        default_value="True",
    )

    run_test = ParameterString(
        name="RunTest",
        default_value="False",
    )

    run_sample_predictions = ParameterString(
        name="RunSamplePredictions",
        default_value="False",
    )

    metrics_definitions = [
        {
            "Name": "train:loss",
            "Regex": "loss: ([0-9\\.]+)"
        },
        {
            "Name": "train:accuracy",
            "Regex": "accuracy: ([0-9\\.]+)"
        },
        {
            "Name": "validation:loss",
            "Regex": "val_loss: ([0-9\\.]+)"
        },
        {
            "Name": "validation:accuracy",
            "Regex": "val_accuracy: ([0-9\\.]+)"
        },
    ]

    train_src = os.path.join(BASE_DIR, "src")
    model_path = f"s3://{default_bucket}/{base_job_prefix}/output/model"

    estimator = TensorFlow(
        entry_point="tf_bert_reviews.py",
        source_dir=BASE_DIR,
        role=role,
        output_path=model_path,
        instance_count=train_instance_count,
        instance_type=train_instance_type,
        volume_size=train_volume_size,
        py_version="py37",
        framework_version="2.3.1",
        hyperparameters={
            "epochs": epochs,
            "learning_rate": learning_rate,
            "epsilon": epsilon,
            "train_batch_size": train_batch_size,
            "validation_batch_size": validation_batch_size,
            "test_batch_size": test_batch_size,
            "train_steps_per_epoch": train_steps_per_epoch,
            "validation_steps": validation_steps,
            "test_steps": test_steps,
            "use_xla": use_xla,
            "use_amp": use_amp,
            "max_seq_length": max_seq_length,
            "freeze_bert_layer": freeze_bert_layer,
            "enable_sagemaker_debugger": enable_sagemaker_debugger,
            "enable_checkpointing": enable_checkpointing,
            "enable_tensorboard": enable_tensorboard,
            "run_validation": run_validation,
            "run_test": run_test,
            "run_sample_predictions": run_sample_predictions,
        },
        input_mode=input_mode,
        metric_definitions=metrics_definitions,
        #        max_run=7200 # max 2 hours * 60 minutes seconds per hour * 60 seconds per minute
    )

    training_step = TrainingStep(
        name="Train",
        estimator=estimator,
        inputs={
            "train":
            TrainingInput(
                s3_data=processing_step.properties.ProcessingOutputConfig.
                Outputs["bert-train"].S3Output.S3Uri,
                content_type="text/csv",
            ),
            "validation":
            TrainingInput(
                s3_data=processing_step.properties.ProcessingOutputConfig.
                Outputs["bert-validation"].S3Output.S3Uri,
                content_type="text/csv",
            ),
            "test":
            TrainingInput(
                s3_data=processing_step.properties.ProcessingOutputConfig.
                Outputs["bert-test"].S3Output.S3Uri,
                content_type="text/csv",
            ),
        },
    )

    #########################
    # EVALUATION STEP
    #########################

    evaluation_processor = SKLearnProcessor(
        framework_version="0.23-1",
        role=role,
        instance_type=processing_instance_type,
        instance_count=processing_instance_count,
        env={"AWS_DEFAULT_REGION": region},
        max_runtime_in_seconds=7200,
    )

    evaluation_report = PropertyFile(name="EvaluationReport",
                                     output_name="metrics",
                                     path="evaluation.json")

    evaluation_step = ProcessingStep(
        name="EvaluateModel",
        processor=evaluation_processor,
        code=os.path.join(BASE_DIR, "evaluate_model_metrics.py"),
        inputs=[
            ProcessingInput(
                source=training_step.properties.ModelArtifacts.
                S3ModelArtifacts,
                destination="/opt/ml/processing/input/model",
            ),
            ProcessingInput(
                source=processing_step.properties.
                ProcessingInputs["raw-input-data"].S3Input.S3Uri,
                destination="/opt/ml/processing/input/data",
            ),
        ],
        outputs=[
            ProcessingOutput(output_name="metrics",
                             s3_upload_mode="EndOfJob",
                             source="/opt/ml/processing/output/metrics/"),
        ],
        job_arguments=[
            "--max-seq-length",
            str(max_seq_length.default_value),
        ],
        property_files=[evaluation_report
                        ],  # these cause deserialization issues
    )

    model_metrics = ModelMetrics(model_statistics=MetricsSource(
        s3_uri="{}/evaluation.json".format(
            evaluation_step.arguments["ProcessingOutputConfig"]["Outputs"][0]
            ["S3Output"]["S3Uri"]),
        content_type="application/json",
    ))

    #########################
    ## REGISTER TRAINED MODEL STEP
    #########################

    model_approval_status = ParameterString(
        name="ModelApprovalStatus", default_value="PendingManualApproval")

    deploy_instance_type = ParameterString(name="DeployInstanceType",
                                           default_value="ml.m5.4xlarge")

    deploy_instance_count = ParameterInteger(name="DeployInstanceCount",
                                             default_value=1)

    inference_image_uri = sagemaker.image_uris.retrieve(
        framework="tensorflow",
        region=region,
        version="2.3.1",
        py_version="py37",
        instance_type=deploy_instance_type,
        image_scope="inference",
    )
    print(inference_image_uri)

    register_step = RegisterModel(
        name="RegisterModel",
        estimator=estimator,
        image_uri=
        inference_image_uri,  # we have to specify, by default it's using training image
        model_data=training_step.properties.ModelArtifacts.S3ModelArtifacts,
        content_types=["text/csv"],
        response_types=["text/csv"],
        inference_instances=[
            deploy_instance_type
        ],  # The JSON spec must be within these instance types or we will see "Instance Type Not Allowed" Exception
        transform_instances=[deploy_instance_type],
        model_package_group_name=model_package_group_name,
        approval_status=model_approval_status,
    )

    #########################
    ## CREATE MODEL FOR DEPLOYMENT STEP
    #########################

    model = Model(
        image_uri=inference_image_uri,
        model_data=training_step.properties.ModelArtifacts.S3ModelArtifacts,
        sagemaker_session=sess,
        role=role,
    )

    create_inputs = CreateModelInput(instance_type=deploy_instance_type, )

    create_step = CreateModelStep(
        name="CreateModel",
        model=model,
        inputs=create_inputs,
    )

    #########################
    ## CONDITION STEP:  EVALUATE THE MODEL
    #########################

    min_accuracy_value = ParameterFloat(name="MinAccuracyValue",
                                        default_value=0.01)

    minimum_accuracy_condition = ConditionGreaterThanOrEqualTo(
        left=JsonGet(
            step=evaluation_step,
            property_file=evaluation_report,
            json_path="metrics.accuracy.value",
        ),
        right=min_accuracy_value,  # accuracy
    )

    minimum_accuracy_condition_step = ConditionStep(
        name="AccuracyCondition",
        conditions=[minimum_accuracy_condition],
        if_steps=[register_step,
                  create_step],  # success, continue with model registration
        else_steps=[],  # fail, end the pipeline
    )

    #########################
    ## CREATE PIPELINE
    #########################

    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[
            input_data,
            processing_instance_count,
            processing_instance_type,
            max_seq_length,
            balance_dataset,
            train_split_percentage,
            validation_split_percentage,
            test_split_percentage,
            feature_store_offline_prefix,
            feature_group_name,
            train_instance_type,
            train_instance_count,
            epochs,
            learning_rate,
            epsilon,
            train_batch_size,
            validation_batch_size,
            test_batch_size,
            train_steps_per_epoch,
            validation_steps,
            test_steps,
            train_volume_size,
            use_xla,
            use_amp,
            freeze_bert_layer,
            enable_sagemaker_debugger,
            enable_checkpointing,
            enable_tensorboard,
            input_mode,
            run_validation,
            run_test,
            run_sample_predictions,
            min_accuracy_value,
            model_approval_status,
            deploy_instance_type,
            deploy_instance_count,
        ],
        steps=[
            processing_step, training_step, evaluation_step,
            minimum_accuracy_condition_step
        ],
        sagemaker_session=sess,
    )

    #########################
    ## RETURN PIPELINE
    #########################

    return pipeline
예제 #17
0
                 property_file=evaluation_report,
                 json_path="regression_metrics.mAP.value"),
    right=conf.model_approval_map_threshold)

model_metrics = ModelMetrics(
    model_statistics=MetricsSource(s3_uri="{}/evaluation.json".format(
        step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0]["S3Output"]
        ["S3Uri"]),
                                   content_type="application/json"))

step_register = RegisterModel(
    name="BittiRegisterModel",
    estimator=tf_train,
    model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
    content_types=["application/octet-stream"],
    response_types=["application/octet-stream"],
    inference_instances=["ml.t2.medium", "ml.m5.xlarge"],
    transform_instances=["ml.m5.xlarge"],
    model_package_group_name=conf.model_package_group_name,
    approval_status=conf.model_approval_status,
    model_metrics=model_metrics)

script_publish = ScriptProcessor(image_uri=str(conf.processing_turicreate_uri),
                                 command=["python3"],
                                 instance_type=conf.processing_instance_type,
                                 instance_count=1,
                                 base_job_name="script-bitti-publish",
                                 role=conf.role)

# model card generator
# absolute wild west of a pipleine step
예제 #18
0
def get_pipeline(
    region,
    sagemaker_project_arn=None,
    role=None,
    default_bucket=None,
    model_package_group_name="restatePackageGroup",  # Choose any name
    pipeline_name="restate-p-XXXXXXXXX",  # You can find your pipeline name in the Studio UI (project -> Pipelines -> name)
    base_job_prefix="restate",  # Choose any name
):
    """Gets a SageMaker ML Pipeline instance working with on RE data.
    Args:
        region: AWS region to create and run the pipeline.
        role: IAM role to create and run steps and pipeline.
        default_bucket: the bucket to use for storing the artifacts
    Returns:
        an instance of a pipeline
    """
    sagemaker_session = get_session(region, default_bucket)
    if role is None:
        role = sagemaker.session.get_execution_role(sagemaker_session)

    # Parameters for pipeline execution
    processing_instance_count = ParameterInteger(name="ProcessingInstanceCount", default_value=1)
    processing_instance_type = ParameterString(
        name="ProcessingInstanceType", default_value="ml.m5.2xlarge"
    )
    training_instance_type = ParameterString(
        name="TrainingInstanceType", default_value="ml.m5.xlarge"
    )
    model_approval_status = ParameterString(
        name="ModelApprovalStatus",
        default_value="PendingManualApproval",  # ModelApprovalStatus can be set to a default of "Approved" if you don't want manual approval.
    )
    input_data = ParameterString(
        name="InputDataUrl",
        default_value=f"",  # Change this to point to the s3 location of your raw input data.
    )

    data_sources = []
    # Sagemaker session
    sess = sagemaker_session

    # You can configure this with your own bucket name, e.g.
    # bucket = "my-bucket"
    bucket = sess.default_bucket()

    data_sources.append(
        ProcessingInput(
            input_name="restate-california",
            dataset_definition=DatasetDefinition(
                local_path="/opt/ml/processing/restate-california",
                data_distribution_type="FullyReplicated",
                # You can override below to point to other database or use different queries
                athena_dataset_definition=AthenaDatasetDefinition(
                    catalog="AwsDataCatalog",
                    database="restate",
                    query_string="SELECT * FROM restate.california_10",
                    output_s3_uri=f"s3://{bucket}/athena/",
                    output_format="PARQUET",
                ),
            ),
        )
    )

    print(f"Data Wrangler export storage bucket: {bucket}")

    # unique flow export ID
    flow_export_id = f"{time.strftime('%d-%H-%M-%S', time.gmtime())}-{str(uuid.uuid4())[:8]}"
    flow_export_name = f"flow-{flow_export_id}"

    # Output name is auto-generated from the select node's ID + output name from the flow file.
    output_name = "99ae1ec3-dd5f-453c-bfae-721dac423cd7.default"

    s3_output_prefix = f"export-{flow_export_name}/output"
    s3_output_path = f"s3://{bucket}/{s3_output_prefix}"
    print(f"Flow S3 export result path: {s3_output_path}")

    processing_job_output = ProcessingOutput(
        output_name=output_name,
        source="/opt/ml/processing/output",
        destination=s3_output_path,
        s3_upload_mode="EndOfJob",
    )

    # name of the flow file which should exist in the current notebook working directory
    flow_file_name = "sagemaker-pipeline/restate-athena-california.flow"

    # Load .flow file from current notebook working directory
    #!echo "Loading flow file from current notebook working directory: $PWD"

    with open(flow_file_name) as f:
        flow = json.load(f)

    # Upload flow to S3
    s3_client = boto3.client("s3")
    s3_client.upload_file(
        flow_file_name,
        bucket,
        f"data_wrangler_flows/{flow_export_name}.flow",
        ExtraArgs={"ServerSideEncryption": "aws:kms"},
    )

    flow_s3_uri = f"s3://{bucket}/data_wrangler_flows/{flow_export_name}.flow"

    print(f"Data Wrangler flow {flow_file_name} uploaded to {flow_s3_uri}")

    ## Input - Flow: restate-athena-russia.flow
    flow_input = ProcessingInput(
        source=flow_s3_uri,
        destination="/opt/ml/processing/flow",
        input_name="flow",
        s3_data_type="S3Prefix",
        s3_input_mode="File",
        s3_data_distribution_type="FullyReplicated",
    )

    # IAM role for executing the processing job.
    iam_role = role

    # Unique processing job name. Give a unique name every time you re-execute processing jobs
    processing_job_name = f"data-wrangler-flow-processing-{flow_export_id}"

    # Data Wrangler Container URL.
    container_uri = sagemaker.image_uris.retrieve(
        framework="data-wrangler",  # we are using the Sagemaker built in xgboost algorithm
        region=region,
    )

    # Processing Job Instance count and instance type.
    instance_count = 2
    instance_type = "ml.m5.4xlarge"

    # Size in GB of the EBS volume to use for storing data during processing
    volume_size_in_gb = 30

    # Content type for each output. Data Wrangler supports CSV as default and Parquet.
    output_content_type = "CSV"

    # Network Isolation mode; default is off
    enable_network_isolation = False

    # List of tags to be passed to the processing job
    user_tags = []

    # Output configuration used as processing job container arguments
    output_config = {output_name: {"content_type": output_content_type}}

    # KMS key for per object encryption; default is None
    kms_key = None

    processor = Processor(
        role=iam_role,
        image_uri=container_uri,
        instance_count=instance_count,
        instance_type=instance_type,
        volume_size_in_gb=volume_size_in_gb,
        network_config=NetworkConfig(enable_network_isolation=enable_network_isolation),
        sagemaker_session=sess,
        output_kms_key=kms_key,
        tags=user_tags,
    )

    data_wrangler_step = ProcessingStep(
        name="DataWranglerProcess",
        processor=processor,
        inputs=[flow_input] + data_sources,
        outputs=[processing_job_output],
        job_arguments=[f"--output-config '{json.dumps(output_config)}'"],
    )

    # Processing step for feature engineering
    # this processor does not have awswrangler installed
    sklearn_processor = SKLearnProcessor(
        framework_version="0.23-1",
        instance_type=processing_instance_type,
        instance_count=processing_instance_count,
        base_job_name=f"{base_job_prefix}/sklearn-restate-preprocess",  # choose any name
        sagemaker_session=sagemaker_session,
        role=role,
    )

    step_process = ProcessingStep(
        name="Preprocess",  # choose any name
        processor=sklearn_processor,
        inputs=[
            ProcessingInput(
                source=data_wrangler_step.properties.ProcessingOutputConfig.Outputs[
                    output_name
                ].S3Output.S3Uri,
                destination="/opt/ml/processing/data/raw-data-dir",
            )
        ],
        outputs=[
            ProcessingOutput(output_name="train", source="/opt/ml/processing/train"),
            ProcessingOutput(output_name="validation", source="/opt/ml/processing/validation"),
            ProcessingOutput(output_name="test", source="/opt/ml/processing/test"),
        ],
        code=os.path.join(BASE_DIR, "preprocess.py"),
        job_arguments=[
            "--input-data",
            data_wrangler_step.properties.ProcessingOutputConfig.Outputs[
                output_name
            ].S3Output.S3Uri,
        ],
    )

    # Training step for generating model artifacts
    model_path = f"s3://{sagemaker_session.default_bucket()}/{base_job_prefix}/restateTrain"
    model_bucket_key = f"{sagemaker_session.default_bucket()}/{base_job_prefix}/restateTrain"
    cache_config = CacheConfig(enable_caching=True, expire_after="30d")

    xgb_image_uri = sagemaker.image_uris.retrieve(
        framework="xgboost",  # we are using the Sagemaker built in xgboost algorithm
        region=region,
        version="1.0-1",
        py_version="py3",
        instance_type=training_instance_type,
    )
    xgb_train = Estimator(
        image_uri=xgb_image_uri,
        instance_type=training_instance_type,
        instance_count=1,
        output_path=model_path,
        base_job_name=f"{base_job_prefix}/restate-xgb-train",
        sagemaker_session=sagemaker_session,
        role=role,
    )
    xgb_train.set_hyperparameters(
        #    #objective="binary:logistic",
        #    objective="reg:linear",
        num_round=50,
        #    max_depth=5,
        #    eta=0.2,
        #    gamma=4,
        #    min_child_weight=6,
        #    subsample=0.7,
        #    silent=0,
    )

    xgb_train.set_hyperparameters(grow_policy="lossguide")

    xgb_objective_metric_name = "validation:mse"
    xgb_hyperparameter_ranges = {
        "max_depth": IntegerParameter(2, 10, scaling_type="Linear"),
    }

    xgb_tuner_log = HyperparameterTuner(
        xgb_train,
        xgb_objective_metric_name,
        xgb_hyperparameter_ranges,
        max_jobs=3,
        max_parallel_jobs=3,
        strategy="Random",
        objective_type="Minimize",
    )

    xgb_step_tuning = TuningStep(
        name="XGBHPTune",
        tuner=xgb_tuner_log,
        inputs={
            "train": TrainingInput(
                s3_data=step_process.properties.ProcessingOutputConfig.Outputs[
                    "train"
                ].S3Output.S3Uri,
                content_type="text/csv",
            ),
            "validation": TrainingInput(
                s3_data=step_process.properties.ProcessingOutputConfig.Outputs[
                    "validation"
                ].S3Output.S3Uri,
                content_type="text/csv",
            ),
        },
        cache_config=cache_config,
    )

    # dtree_image_uri = '625467769535.dkr.ecr.ap-southeast-1.amazonaws.com/sagemaker-decision-tree:latest'
    dtree_image_uri = sagemaker_session.sagemaker_client.describe_image_version(
        ImageName="restate-dtree"
    )["ContainerImage"]

    dtree_train = Estimator(
        image_uri=dtree_image_uri,
        role=role,
        instance_count=1,
        instance_type=training_instance_type,
        base_job_name=f"{base_job_prefix}/restate-dtree-train",
        output_path=model_path,
        sagemaker_session=sagemaker_session,
    )

    dtree_objective_metric_name = "validation:mse"
    dtree_metric_definitions = [{"Name": "validation:mse", "Regex": "mse:(\S+)"}]

    dtree_hyperparameter_ranges = {
        "max_depth": IntegerParameter(10, 50, scaling_type="Linear"),
        "max_leaf_nodes": IntegerParameter(2, 12, scaling_type="Linear"),
    }

    dtree_tuner_log = HyperparameterTuner(
        dtree_train,
        dtree_objective_metric_name,
        dtree_hyperparameter_ranges,
        dtree_metric_definitions,
        max_jobs=3,
        max_parallel_jobs=3,
        strategy="Random",
        objective_type="Minimize",
    )

    dtree_step_tuning = TuningStep(
        name="DTreeHPTune",
        tuner=dtree_tuner_log,
        inputs={
            "training": TrainingInput(
                s3_data=step_process.properties.ProcessingOutputConfig.Outputs[
                    "train"
                ].S3Output.S3Uri,
                content_type="text/csv",
            ),
            "validation": TrainingInput(
                s3_data=step_process.properties.ProcessingOutputConfig.Outputs[
                    "validation"
                ].S3Output.S3Uri,
                content_type="text/csv",
            ),
        },
        cache_config=cache_config,
    )

    dtree_script_eval = ScriptProcessor(
        image_uri=dtree_image_uri,
        command=["python3"],
        instance_type=processing_instance_type,
        instance_count=1,
        base_job_name=f"{base_job_prefix}/script-dtree-eval",
        sagemaker_session=sagemaker_session,
        role=role,
    )

    dtree_evaluation_report = PropertyFile(
        name="EvaluationReportDTree",
        output_name="dtree_evaluation",
        path="dtree_evaluation.json",
    )

    dtree_step_eval = ProcessingStep(
        name="DTreeEval",
        processor=dtree_script_eval,
        inputs=[
            ProcessingInput(
                # source=dtree_step_train.properties.ModelArtifacts.S3ModelArtifacts,
                source=dtree_step_tuning.get_top_model_s3_uri(top_k=0, s3_bucket=model_bucket_key),
                destination="/opt/ml/processing/model",
            ),
            ProcessingInput(
                source=step_process.properties.ProcessingOutputConfig.Outputs[
                    "test"
                ].S3Output.S3Uri,
                destination="/opt/ml/processing/test",
            ),
        ],
        outputs=[
            ProcessingOutput(
                output_name="dtree_evaluation", source="/opt/ml/processing/evaluation"
            ),
        ],
        code=os.path.join(BASE_DIR, "dtree_evaluate.py"),
        property_files=[dtree_evaluation_report],
    )

    xgb_script_eval = ScriptProcessor(
        image_uri=xgb_image_uri,
        command=["python3"],
        instance_type=processing_instance_type,
        instance_count=1,
        base_job_name=f"{base_job_prefix}/script-xgb-eval",
        sagemaker_session=sagemaker_session,
        role=role,
    )

    xgb_evaluation_report = PropertyFile(
        name="EvaluationReportXGBoost",
        output_name="xgb_evaluation",
        path="xgb_evaluation.json",
    )

    xgb_step_eval = ProcessingStep(
        name="XGBEval",
        processor=xgb_script_eval,
        inputs=[
            ProcessingInput(
                source=xgb_step_tuning.get_top_model_s3_uri(top_k=0, s3_bucket=model_bucket_key),
                destination="/opt/ml/processing/model",
            ),
            ProcessingInput(
                source=step_process.properties.ProcessingOutputConfig.Outputs[
                    "test"
                ].S3Output.S3Uri,
                destination="/opt/ml/processing/test",
            ),
        ],
        outputs=[
            ProcessingOutput(output_name="xgb_evaluation", source="/opt/ml/processing/evaluation"),
        ],
        code=os.path.join(BASE_DIR, "xgb_evaluate.py"),
        property_files=[xgb_evaluation_report],
    )

    xgb_model_metrics = ModelMetrics(
        model_statistics=MetricsSource(
            s3_uri="{}/xgb_evaluation.json".format(
                xgb_step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0]["S3Output"]["S3Uri"]
            ),
            content_type="application/json",
        )
    )

    dtree_model_metrics = ModelMetrics(
        model_statistics=MetricsSource(
            s3_uri="{}/dtree_evaluation.json".format(
                dtree_step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0]["S3Output"][
                    "S3Uri"
                ]
            ),
            content_type="application/json",
        )
    )

    xgb_eval_metrics = JsonGet(
        step=xgb_step_eval,
        property_file=xgb_evaluation_report,
        json_path="regression_metrics.r2s.value",  # This should follow the structure of your report_dict defined in the evaluate.py file.
    )

    dtree_eval_metrics = JsonGet(
        step=dtree_step_eval,
        property_file=dtree_evaluation_report,
        json_path="regression_metrics.r2s.value",  # This should follow the structure of your report_dict defined in the evaluate.py file.
    )

    # Register model step that will be conditionally executed
    dtree_step_register = RegisterModel(
        name="DTreeReg",
        estimator=dtree_train,
        model_data=dtree_step_tuning.get_top_model_s3_uri(top_k=0, s3_bucket=model_bucket_key),
        content_types=["text/csv"],
        response_types=["text/csv"],
        inference_instances=["ml.t2.medium", "ml.m5.large"],
        transform_instances=["ml.m5.large"],
        model_package_group_name=model_package_group_name,
        approval_status=model_approval_status,
        model_metrics=dtree_model_metrics,
    )

    # Register model step that will be conditionally executed
    xgb_step_register = RegisterModel(
        name="XGBReg",
        estimator=xgb_train,
        model_data=xgb_step_tuning.get_top_model_s3_uri(top_k=0, s3_bucket=model_bucket_key),
        content_types=["text/csv"],
        response_types=["text/csv"],
        inference_instances=["ml.t2.medium", "ml.m5.large"],
        transform_instances=["ml.m5.large"],
        model_package_group_name=model_package_group_name,
        approval_status=model_approval_status,
        model_metrics=xgb_model_metrics,
    )

    # Condition step for evaluating model quality and branching execution
    cond_lte = ConditionGreaterThanOrEqualTo(  # You can change the condition here
        left=JsonGet(
            step=dtree_step_eval,
            property_file=dtree_evaluation_report,
            json_path="regression_metrics.r2s.value",  # This should follow the structure of your report_dict defined in the evaluate.py file.
        ),
        right=JsonGet(
            step=xgb_step_eval,
            property_file=xgb_evaluation_report,
            json_path="regression_metrics.r2s.value",  # This should follow the structure of your report_dict defined in the evaluate.py file.
        ),  # You can change the threshold here
    )

    step_cond = ConditionStep(
        name="AccuracyCond",
        conditions=[cond_lte],
        if_steps=[dtree_step_register],
        else_steps=[xgb_step_register],
    )
    create_date = time.strftime("%Y-%m-%d-%H-%M-%S")

    # Pipeline instance
    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[
            processing_instance_type,
            processing_instance_count,
            training_instance_type,
            model_approval_status,
            input_data
        ],
        pipeline_experiment_config=PipelineExperimentConfig(
            pipeline_name + "-" + create_date, "restate-{}".format(create_date)
        ),
        steps=[
            data_wrangler_step,
            step_process,
            dtree_step_tuning,
            xgb_step_tuning,
            dtree_step_eval,
            xgb_step_eval,
            step_cond,
        ],
        sagemaker_session=sagemaker_session,
    )
    return pipeline
def test_register_model_with_model_repack(estimator, model_metrics):
    model_data = f"s3://{BUCKET}/model.tar.gz"
    register_model = RegisterModel(
        name="RegisterModelStep",
        estimator=estimator,
        model_data=model_data,
        content_types=["content_type"],
        response_types=["response_type"],
        inference_instances=["inference_instance"],
        transform_instances=["transform_instance"],
        model_package_group_name="mpg",
        model_metrics=model_metrics,
        approval_status="Approved",
        description="description",
        entry_point=f"{DATA_DIR}/dummy_script.py",
        depends_on=["TestStep"],
    )

    request_dicts = register_model.request_dicts()
    assert len(request_dicts) == 2

    for request_dict in request_dicts:
        if request_dict["Type"] == "Training":
            assert request_dict["Name"] == "RegisterModelStepRepackModel"
            assert len(request_dict["DependsOn"]) == 1
            assert request_dict["DependsOn"][0] == "TestStep"
            arguments = request_dict["Arguments"]
            repacker_job_name = arguments["HyperParameters"]["sagemaker_job_name"]
            assert ordered(arguments) == ordered(
                {
                    "AlgorithmSpecification": {
                        "TrainingImage": MODEL_REPACKING_IMAGE_URI,
                        "TrainingInputMode": "File",
                    },
                    "DebugHookConfig": {
                        "CollectionConfigurations": [],
                        "S3OutputPath": f"s3://{BUCKET}/",
                    },
                    "HyperParameters": {
                        "inference_script": '"dummy_script.py"',
                        "model_archive": '"model.tar.gz"',
                        "sagemaker_submit_directory": '"s3://{}/{}/source/sourcedir.tar.gz"'.format(
                            BUCKET, repacker_job_name.replace('"', "")
                        ),
                        "sagemaker_program": '"_repack_model.py"',
                        "sagemaker_container_log_level": "20",
                        "sagemaker_job_name": repacker_job_name,
                        "sagemaker_region": f'"{REGION}"',
                    },
                    "InputDataConfig": [
                        {
                            "ChannelName": "training",
                            "DataSource": {
                                "S3DataSource": {
                                    "S3DataDistributionType": "FullyReplicated",
                                    "S3DataType": "S3Prefix",
                                    "S3Uri": f"s3://{BUCKET}",
                                }
                            },
                        }
                    ],
                    "OutputDataConfig": {"S3OutputPath": f"s3://{BUCKET}/"},
                    "ResourceConfig": {
                        "InstanceCount": 1,
                        "InstanceType": "ml.m5.large",
                        "VolumeSizeInGB": 30,
                    },
                    "RoleArn": ROLE,
                    "StoppingCondition": {"MaxRuntimeInSeconds": 86400},
                }
            )
        elif request_dict["Type"] == "RegisterModel":
            assert request_dict["Name"] == "RegisterModelStep"
            assert "DependsOn" not in request_dict
            arguments = request_dict["Arguments"]
            assert len(arguments["InferenceSpecification"]["Containers"]) == 1
            assert (
                arguments["InferenceSpecification"]["Containers"][0]["Image"]
                == estimator.training_image_uri()
            )
            assert isinstance(
                arguments["InferenceSpecification"]["Containers"][0]["ModelDataUrl"], Properties
            )
            del arguments["InferenceSpecification"]["Containers"]
            assert ordered(arguments) == ordered(
                {
                    "InferenceSpecification": {
                        "SupportedContentTypes": ["content_type"],
                        "SupportedRealtimeInferenceInstanceTypes": ["inference_instance"],
                        "SupportedResponseMIMETypes": ["response_type"],
                        "SupportedTransformInstanceTypes": ["transform_instance"],
                    },
                    "ModelApprovalStatus": "Approved",
                    "ModelMetrics": {
                        "ModelQuality": {
                            "Statistics": {
                                "ContentType": "text/csv",
                                "S3Uri": f"s3://{BUCKET}/metrics.csv",
                            },
                        },
                    },
                    "ModelPackageDescription": "description",
                    "ModelPackageGroupName": "mpg",
                }
            )
        else:
            raise Exception("A step exists in the collection of an invalid type.")
def test_register_model_with_model_repack_with_pipeline_model(
        pipeline_model, model_metrics, drift_check_baselines):
    model_data = f"s3://{BUCKET}/model.tar.gz"
    service_fault_retry_policy = StepRetryPolicy(
        exception_types=[StepExceptionTypeEnum.SERVICE_FAULT], max_attempts=10)
    register_model = RegisterModel(
        name="RegisterModelStep",
        model=pipeline_model,
        model_data=model_data,
        content_types=["content_type"],
        response_types=["response_type"],
        inference_instances=["inference_instance"],
        transform_instances=["transform_instance"],
        model_package_group_name="mpg",
        model_metrics=model_metrics,
        drift_check_baselines=drift_check_baselines,
        approval_status="Approved",
        description="description",
        depends_on=["TestStep"],
        repack_model_step_retry_policies=[service_fault_retry_policy],
        register_model_step_retry_policies=[service_fault_retry_policy],
        tags=[{
            "Key": "myKey",
            "Value": "myValue"
        }],
    )

    request_dicts = register_model.request_dicts()
    assert len(request_dicts) == 2

    for request_dict in request_dicts:
        if request_dict["Type"] == "Training":
            assert request_dict["Name"] == "modelNameRepackModel"
            assert len(request_dict["DependsOn"]) == 1
            assert request_dict["DependsOn"][0] == "TestStep"
            arguments = request_dict["Arguments"]
            repacker_job_name = arguments["HyperParameters"][
                "sagemaker_job_name"]
            assert ordered(arguments) == ordered({
                "AlgorithmSpecification": {
                    "TrainingImage": MODEL_REPACKING_IMAGE_URI,
                    "TrainingInputMode": "File",
                },
                "DebugHookConfig": {
                    "CollectionConfigurations": [],
                    "S3OutputPath": f"s3://{BUCKET}/",
                },
                "HyperParameters": {
                    "dependencies":
                    "null",
                    "inference_script":
                    '"dummy_script.py"',
                    "model_archive":
                    '"model.tar.gz"',
                    "sagemaker_submit_directory":
                    '"s3://{}/{}/source/sourcedir.tar.gz"'.format(
                        BUCKET, repacker_job_name.replace('"', "")),
                    "sagemaker_program":
                    '"_repack_model.py"',
                    "sagemaker_container_log_level":
                    "20",
                    "sagemaker_job_name":
                    repacker_job_name,
                    "sagemaker_region":
                    f'"{REGION}"',
                    "source_dir":
                    "null",
                },
                "InputDataConfig": [{
                    "ChannelName": "training",
                    "DataSource": {
                        "S3DataSource": {
                            "S3DataDistributionType": "FullyReplicated",
                            "S3DataType": "S3Prefix",
                            "S3Uri": f"s3://{BUCKET}",
                        }
                    },
                }],
                "OutputDataConfig": {
                    "S3OutputPath": f"s3://{BUCKET}/"
                },
                "ResourceConfig": {
                    "InstanceCount": 1,
                    "InstanceType": "ml.m5.large",
                    "VolumeSizeInGB": 30,
                },
                "RoleArn":
                ROLE,
                "StoppingCondition": {
                    "MaxRuntimeInSeconds": 86400
                },
                "Tags": [{
                    "Key": "myKey",
                    "Value": "myValue"
                }],
                "VpcConfig": [
                    ("SecurityGroupIds", ["123", "456"]),
                    ("Subnets", ["abc", "def"]),
                ],
            })
        elif request_dict["Type"] == "RegisterModel":
            assert request_dict["Name"] == "RegisterModelStep"
            assert "DependsOn" not in request_dict
            arguments = request_dict["Arguments"]
            assert len(arguments["InferenceSpecification"]["Containers"]) == 1
            assert (arguments["InferenceSpecification"]["Containers"][0]
                    ["Image"] == pipeline_model.models[0].image_uri)
            assert isinstance(
                arguments["InferenceSpecification"]["Containers"][0]
                ["ModelDataUrl"], Properties)
            del arguments["InferenceSpecification"]["Containers"]
            assert ordered(arguments) == ordered({
                "InferenceSpecification": {
                    "SupportedContentTypes": ["content_type"],
                    "SupportedRealtimeInferenceInstanceTypes":
                    ["inference_instance"],
                    "SupportedResponseMIMETypes": ["response_type"],
                    "SupportedTransformInstanceTypes": ["transform_instance"],
                },
                "ModelApprovalStatus":
                "Approved",
                "ModelMetrics": {
                    "Bias": {},
                    "Explainability": {},
                    "ModelQuality": {
                        "Statistics": {
                            "ContentType": "text/csv",
                            "S3Uri": f"s3://{BUCKET}/metrics.csv",
                        },
                    },
                },
                "DriftCheckBaselines": {
                    "ModelQuality": {
                        "Constraints": {
                            "ContentType": "text/csv",
                            "S3Uri": f"s3://{BUCKET}/constraints_metrics.csv",
                        }
                    }
                },
                "ModelPackageDescription":
                "description",
                "ModelPackageGroupName":
                "mpg",
                "Tags": [{
                    "Key": "myKey",
                    "Value": "myValue"
                }],
            })
        else:
            raise Exception(
                "A step exists in the collection of an invalid type.")
def test_mxnet_model_registration(
    sagemaker_session,
    role,
    cpu_instance_type,
    pipeline_name,
    region_name,
):
    base_dir = os.path.join(DATA_DIR, "mxnet_mnist")
    source_dir = os.path.join(base_dir, "code")
    entry_point = os.path.join(source_dir, "inference.py")
    mx_mnist_model_data = os.path.join(base_dir, "model.tar.gz")

    instance_count = ParameterInteger(name="InstanceCount", default_value=1)
    instance_type = ParameterString(name="InstanceType",
                                    default_value="ml.m5.xlarge")

    model = MXNetModel(
        entry_point=entry_point,
        source_dir=source_dir,
        role=role,
        model_data=mx_mnist_model_data,
        framework_version="1.7.0",
        py_version="py3",
        sagemaker_session=sagemaker_session,
    )

    step_register = RegisterModel(
        name="mxnet-register-model",
        model=model,
        content_types=["*"],
        response_types=["*"],
        inference_instances=["ml.m5.xlarge"],
        transform_instances=["*"],
        description="test-description",
    )

    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[instance_count, instance_type],
        steps=[step_register],
        sagemaker_session=sagemaker_session,
    )

    try:
        response = pipeline.create(role)
        create_arn = response["PipelineArn"]
        assert re.match(
            rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}",
            create_arn,
        )

        execution = pipeline.start(parameters={})
        assert re.match(
            rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}/execution/",
            execution.arn,
        )

        execution = pipeline.start()
        assert re.match(
            rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}/execution/",
            execution.arn,
        )
    finally:
        try:
            pipeline.delete()
        except Exception:
            pass
def test_sklearn_xgboost_sip_model_registration(sagemaker_session, role,
                                                pipeline_name, region_name):
    prefix = "sip"
    bucket_name = sagemaker_session.default_bucket()
    instance_count = ParameterInteger(name="InstanceCount", default_value=1)
    instance_type = ParameterString(name="InstanceType",
                                    default_value="ml.m5.xlarge")

    sklearn_processor = SKLearnProcessor(
        role=role,
        instance_type=instance_type,
        instance_count=instance_count,
        framework_version="0.20.0",
        sagemaker_session=sagemaker_session,
    )

    # The path to the raw data.
    raw_data_path = "s3://{0}/{1}/data/raw/".format(bucket_name, prefix)
    raw_data_path_param = ParameterString(name="raw_data_path",
                                          default_value=raw_data_path)

    # The output path to the training data.
    train_data_path = "s3://{0}/{1}/data/preprocessed/train/".format(
        bucket_name, prefix)
    train_data_path_param = ParameterString(name="train_data_path",
                                            default_value=train_data_path)

    # The output path to the validation data.
    val_data_path = "s3://{0}/{1}/data/preprocessed/val/".format(
        bucket_name, prefix)
    val_data_path_param = ParameterString(name="val_data_path",
                                          default_value=val_data_path)

    # The training output path for the model.
    output_path = "s3://{0}/{1}/output/".format(bucket_name, prefix)
    output_path_param = ParameterString(name="output_path",
                                        default_value=output_path)

    # The output path to the featurizer model.
    model_path = "s3://{0}/{1}/output/sklearn/".format(bucket_name, prefix)
    model_path_param = ParameterString(name="model_path",
                                       default_value=model_path)

    inputs = [
        ProcessingInput(
            input_name="raw_data",
            source=raw_data_path_param,
            destination="/opt/ml/processing/input",
        )
    ]

    outputs = [
        ProcessingOutput(
            output_name="train_data",
            source="/opt/ml/processing/train",
            destination=train_data_path_param,
        ),
        ProcessingOutput(
            output_name="val_data",
            source="/opt/ml/processing/val",
            destination=val_data_path_param,
        ),
        ProcessingOutput(
            output_name="model",
            source="/opt/ml/processing/model",
            destination=model_path_param,
        ),
    ]

    base_dir = os.path.join(DATA_DIR, "sip")
    code_path = os.path.join(base_dir, "preprocessor.py")

    processing_step = ProcessingStep(
        name="Processing",
        code=code_path,
        processor=sklearn_processor,
        inputs=inputs,
        outputs=outputs,
        job_arguments=["--train-test-split-ratio", "0.2"],
    )

    entry_point = "training.py"
    source_dir = base_dir
    code_location = "s3://{0}/{1}/code".format(bucket_name, prefix)

    estimator = XGBoost(
        entry_point=entry_point,
        source_dir=source_dir,
        output_path=output_path_param,
        code_location=code_location,
        instance_type=instance_type,
        instance_count=instance_count,
        framework_version="0.90-2",
        sagemaker_session=sagemaker_session,
        py_version="py3",
        role=role,
    )

    training_step = TrainingStep(
        name="Training",
        estimator=estimator,
        inputs={
            "train":
            TrainingInput(
                s3_data=processing_step.properties.ProcessingOutputConfig.
                Outputs["train_data"].S3Output.S3Uri,
                content_type="text/csv",
            ),
            "validation":
            TrainingInput(
                s3_data=processing_step.properties.ProcessingOutputConfig.
                Outputs["val_data"].S3Output.S3Uri,
                content_type="text/csv",
            ),
        },
    )

    code_location = "s3://{0}/{1}/code".format(bucket_name, prefix)
    source_dir = os.path.join(base_dir, "sklearn_source_dir")

    sklearn_model = SKLearnModel(
        name="sklearn-model",
        model_data=processing_step.properties.ProcessingOutputConfig.
        Outputs["model"].S3Output.S3Uri,
        entry_point="inference.py",
        source_dir=source_dir,
        code_location=code_location,
        role=role,
        sagemaker_session=sagemaker_session,
        framework_version="0.20.0",
        py_version="py3",
    )

    code_location = "s3://{0}/{1}/code".format(bucket_name, prefix)
    source_dir = os.path.join(base_dir, "xgboost_source_dir")

    xgboost_model = XGBoostModel(
        name="xgboost-model",
        model_data=training_step.properties.ModelArtifacts.S3ModelArtifacts,
        entry_point="inference.py",
        source_dir=source_dir,
        code_location=code_location,
        framework_version="0.90-2",
        py_version="py3",
        role=role,
        sagemaker_session=sagemaker_session,
    )

    pipeline_model = PipelineModel([xgboost_model, sklearn_model],
                                   role,
                                   sagemaker_session=sagemaker_session)

    step_register = RegisterModel(
        name="AbaloneRegisterModel",
        model=pipeline_model,
        content_types=["application/json"],
        response_types=["application/json"],
        inference_instances=["ml.t2.medium", "ml.m5.xlarge"],
        transform_instances=["ml.m5.xlarge"],
        model_package_group_name="windturbine",
    )

    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[
            raw_data_path_param,
            train_data_path_param,
            val_data_path_param,
            model_path_param,
            instance_type,
            instance_count,
            output_path_param,
        ],
        steps=[processing_step, training_step, step_register],
        sagemaker_session=sagemaker_session,
    )

    try:
        response = pipeline.upsert(role_arn=role)
        create_arn = response["PipelineArn"]
        assert re.match(
            rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}",
            create_arn,
        )

        execution = pipeline.start(parameters={})
        assert re.match(
            rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}/execution/",
            execution.arn,
        )

        execution = pipeline.start()
        assert re.match(
            rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}/execution/",
            execution.arn,
        )
    finally:
        try:
            pipeline.delete()
        except Exception:
            pass
예제 #23
0
def get_pipeline(
    region,
    role=None,
    default_bucket=None,
    model_package_group_name="TestPackageGroup",
    pipeline_name="TestPipeline",
    base_job_prefix="Test",
):
    """Gets a SageMaker ML Pipeline instance working with on abalone data.

    Args:
        region: AWS region to create and run the pipeline.
        role: IAM role to create and run steps and pipeline.
        default_bucket: the bucket to use for storing the artifacts

    Returns:
        an instance of a pipeline
    """
    sagemaker_session = get_session(region, default_bucket)
    if role is None:
        role = sagemaker.session.get_execution_role(sagemaker_session)

    # parameters for pipeline execution
    processing_instance_count = ParameterInteger(
        name="ProcessingInstanceCount", default_value=1)
    processing_instance_type = ParameterString(name="ProcessingInstanceType",
                                               default_value="ml.m5.xlarge")
    training_instance_type = ParameterString(name="TrainingInstanceType",
                                             default_value="ml.m5.xlarge")
    model_approval_status = ParameterString(
        name="ModelApprovalStatus", default_value="PendingManualApproval")
    input_data = ParameterString(
        name="InputDataUrl",
        default_value=
        f"s3://sagemaker-servicecatalog-seedcode-{region}/dataset/abalone-dataset.csv",
    )

    # processing step for feature engineering
    sklearn_processor = SKLearnProcessor(
        framework_version="0.23-1",
        instance_type=processing_instance_type,
        instance_count=processing_instance_count,
        base_job_name=f"{base_job_prefix}/sklearn-test-preprocess",
        sagemaker_session=sagemaker_session,
        role=role,
    )
    step_process = ProcessingStep(
        name="PreprocessTestData",
        processor=sklearn_processor,
        outputs=[
            ProcessingOutput(output_name="train",
                             source="/opt/ml/processing/train"),
            ProcessingOutput(output_name="validation",
                             source="/opt/ml/processing/validation"),
            ProcessingOutput(output_name="test",
                             source="/opt/ml/processing/test"),
        ],
        code=os.path.join(BASE_DIR, "preprocess.py"),
        job_arguments=["--input-data", input_data],
    )

    # training step for generating model artifacts
    model_path = f"s3://{sagemaker_session.default_bucket()}/{base_job_prefix}/TestTrain"
    image_uri = sagemaker.image_uris.retrieve(
        framework="xgboost",
        region=region,
        version="1.0-1",
        py_version="py3",
        instance_type=training_instance_type,
    )
    xgb_train = Estimator(
        image_uri=image_uri,
        instance_type=training_instance_type,
        instance_count=1,
        output_path=model_path,
        base_job_name=f"{base_job_prefix}/test-train",
        sagemaker_session=sagemaker_session,
        role=role,
    )
    xgb_train.set_hyperparameters(
        objective="reg:linear",
        num_round=50,
        max_depth=5,
        eta=0.2,
        gamma=4,
        min_child_weight=6,
        subsample=0.7,
        silent=0,
    )
    step_train = TrainingStep(
        name="TrainTestModel",
        estimator=xgb_train,
        inputs={
            "train":
            TrainingInput(
                s3_data=step_process.properties.ProcessingOutputConfig.
                Outputs["train"].S3Output.S3Uri,
                content_type="text/csv",
            ),
            "validation":
            TrainingInput(
                s3_data=step_process.properties.ProcessingOutputConfig.
                Outputs["validation"].S3Output.S3Uri,
                content_type="text/csv",
            ),
        },
    )

    # processing step for evaluation
    script_eval = ScriptProcessor(
        image_uri=image_uri,
        command=["python3"],
        instance_type=processing_instance_type,
        instance_count=1,
        base_job_name=f"{base_job_prefix}/script-test-eval",
        sagemaker_session=sagemaker_session,
        role=role,
    )
    evaluation_report = PropertyFile(
        name="TestEvaluationReport",
        output_name="evaluation",
        path="evaluation.json",
    )
    step_eval = ProcessingStep(
        name="EvaluateTestModel",
        processor=script_eval,
        inputs=[
            ProcessingInput(
                source=step_train.properties.ModelArtifacts.S3ModelArtifacts,
                destination="/opt/ml/processing/model",
            ),
            ProcessingInput(
                source=step_process.properties.ProcessingOutputConfig.
                Outputs["test"].S3Output.S3Uri,
                destination="/opt/ml/processing/test",
            ),
        ],
        outputs=[
            ProcessingOutput(output_name="evaluation",
                             source="/opt/ml/processing/evaluation"),
        ],
        code=os.path.join(BASE_DIR, "evaluate.py"),
        property_files=[evaluation_report],
    )

    # register model step that will be conditionally executed
    model_metrics = ModelMetrics(
        model_statistics=MetricsSource(s3_uri="{}/evaluation.json".format(
            step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0]
            ["S3Output"]["S3Uri"]),
                                       content_type="application/json"))
    step_register = RegisterModel(
        name="RegisterTestModel",
        estimator=xgb_train,
        model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
        content_types=["text/csv"],
        response_types=["text/csv"],
        inference_instances=["ml.t2.medium", "ml.m5.large"],
        transform_instances=["ml.m5.large"],
        model_package_group_name=model_package_group_name,
        approval_status=model_approval_status,
        model_metrics=model_metrics,
    )

    # condition step for evaluating model quality and branching execution
    cond_lte = ConditionLessThanOrEqualTo(
        left=JsonGet(step=step_eval,
                     property_file=evaluation_report,
                     json_path="regression_metrics.mse.value"),
        right=6.0,
    )
    step_cond = ConditionStep(
        name="CheckMSETestEvaluation",
        conditions=[cond_lte],
        if_steps=[step_register],
        else_steps=[],
    )

    # pipeline instance
    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[
            processing_instance_type,
            processing_instance_count,
            training_instance_type,
            model_approval_status,
            input_data,
        ],
        steps=[step_process, step_train, step_eval, step_cond],
        sagemaker_session=sagemaker_session,
    )
    return pipeline
예제 #24
0
def get_pipeline(
        region,
        sagemaker_project_arn=None,
        role=None,
        default_bucket='',
        pipeline_name='end-to-end-ml-sagemaker-pipeline',
        model_package_group_name='end-to-end-ml-sm-model-package-group',
        base_job_prefix='endtoendmlsm') -> Pipeline:
    """
    Gets the SM Pipeline.

    :param role: The execution role.
    :param bucket_name: The bucket where pipeline artifacts are stored.
    :param prefix: The prefix where pipeline artifacts are stored.
    :return: A Pipeline instance.
    """

    bucket_name = default_bucket
    prefix = 'endtoendmlsm'
    sagemaker_session = get_session(region, bucket_name)

    # ---------------------
    # Processing parameters
    # ---------------------
    # The path to the raw data.
    raw_data_path = 's3://gianpo-public/endtoendml/data/raw/predmain_raw_data_header.csv'.format(
        bucket_name, prefix)
    raw_data_path_param = ParameterString(name="raw_data_path",
                                          default_value=raw_data_path)
    # The output path to the training data.
    train_data_path = 's3://{0}/{1}/data/preprocessed/train/'.format(
        bucket_name, prefix)
    train_data_path_param = ParameterString(name="train_data_path",
                                            default_value=train_data_path)
    # The output path to the validation data.
    val_data_path = 's3://{0}/{1}/data/preprocessed/val/'.format(
        bucket_name, prefix)
    val_data_path_param = ParameterString(name="val_data_path",
                                          default_value=val_data_path)
    # The output path to the featurizer model.
    model_path = 's3://{0}/{1}/output/sklearn/'.format(bucket_name, prefix)
    model_path_param = ParameterString(name="model_path",
                                       default_value=model_path)
    # The instance type for the processing job.
    processing_instance_type_param = ParameterString(
        name="processing_instance_type", default_value='ml.m5.large')
    # The instance count for the processing job.
    processing_instance_count_param = ParameterInteger(
        name="processing_instance_count", default_value=1)
    # The train/test split ration parameter.
    train_test_split_ratio_param = ParameterString(
        name="train_test_split_ratio", default_value='0.2')
    # -------------------
    # Training parameters
    # -------------------
    # XGB hyperparameters.
    max_depth_param = ParameterString(name="max_depth", default_value='3')
    eta_param = ParameterString(name="eta", default_value='0.1')
    gamma_param = ParameterString(name="gamma", default_value='0')
    min_child_weight_param = ParameterString(name="min_child_weight",
                                             default_value='1')
    objective_param = ParameterString(name="objective",
                                      default_value='binary:logistic')
    num_round_param = ParameterString(name="num_round", default_value='10')
    eval_metric_param = ParameterString(name="eval_metric",
                                        default_value='auc')
    # The instance type for the training job.
    training_instance_type_param = ParameterString(
        name="training_instance_type", default_value='ml.m5.xlarge')
    # The instance count for the training job.
    training_instance_count_param = ParameterInteger(
        name="training_instance_count", default_value=1)
    # The training output path for the model.
    output_path = 's3://{0}/{1}/output/'.format(bucket_name, prefix)
    output_path_param = ParameterString(name="output_path",
                                        default_value=output_path)
    # --------------------------
    # Register model parameters
    # --------------------------
    # The default instance type for deployment.
    deploy_instance_type_param = ParameterString(name="deploy_instance_type",
                                                 default_value='ml.m5.2xlarge')
    # The approval status for models added to the registry.
    model_approval_status_param = ParameterString(
        name="model_approval_status", default_value='PendingManualApproval')
    # --------------------------
    # Processing Step
    # --------------------------
    sklearn_processor = SKLearnProcessor(
        role=role,
        instance_type=processing_instance_type_param,
        instance_count=processing_instance_count_param,
        framework_version='0.20.0')
    inputs = [
        ProcessingInput(input_name='raw_data',
                        source=raw_data_path_param,
                        destination='/opt/ml/processing/input')
    ]
    outputs = [
        ProcessingOutput(output_name='train_data',
                         source='/opt/ml/processing/train',
                         destination=train_data_path_param),
        ProcessingOutput(output_name='val_data',
                         source='/opt/ml/processing/val',
                         destination=val_data_path_param),
        ProcessingOutput(output_name='model',
                         source='/opt/ml/processing/model',
                         destination=model_path_param)
    ]
    code_path = os.path.join(BASE_DIR, 'dataprep/preprocess.py')
    processing_step = ProcessingStep(name='Processing',
                                     code=code_path,
                                     processor=sklearn_processor,
                                     inputs=inputs,
                                     outputs=outputs,
                                     job_arguments=[
                                         '--train-test-split-ratio',
                                         train_test_split_ratio_param
                                     ])
    # --------------------------
    # Training Step
    # --------------------------
    hyperparameters = {
        "max_depth": max_depth_param,
        "eta": eta_param,
        "gamma": gamma_param,
        "min_child_weight": min_child_weight_param,
        "silent": 0,
        "objective": objective_param,
        "num_round": num_round_param,
        "eval_metric": eval_metric_param
    }
    entry_point = 'train.py'
    source_dir = os.path.join(BASE_DIR, 'train/')
    code_location = 's3://{0}/{1}/code'.format(bucket_name, prefix)
    estimator = XGBoost(entry_point=entry_point,
                        source_dir=source_dir,
                        output_path=output_path_param,
                        code_location=code_location,
                        hyperparameters=hyperparameters,
                        instance_type=training_instance_type_param,
                        instance_count=training_instance_count_param,
                        framework_version="0.90-2",
                        py_version="py3",
                        role=role)
    training_step = TrainingStep(
        name='Training',
        estimator=estimator,
        inputs={
            'train':
            TrainingInput(
                s3_data=processing_step.properties.ProcessingOutputConfig.
                Outputs['train_data'].S3Output.S3Uri,
                content_type='text/csv'),
            'validation':
            TrainingInput(
                s3_data=processing_step.properties.ProcessingOutputConfig.
                Outputs['val_data'].S3Output.S3Uri,
                content_type='text/csv')
        })
    # --------------------------
    # Register Model Step
    # --------------------------
    code_location = 's3://{0}/{1}/code'.format(bucket_name, prefix)
    sklearn_model = SKLearnModel(
        name='end-to-end-ml-sm-skl-model-{0}'.format(str(int(time.time()))),
        model_data=processing_step.properties.ProcessingOutputConfig.
        Outputs['model'].S3Output.S3Uri,
        entry_point='inference.py',
        source_dir=os.path.join(BASE_DIR, 'deploy/sklearn/'),
        code_location=code_location,
        role=role,
        sagemaker_session=sagemaker_session,
        framework_version='0.20.0',
        py_version='py3')
    code_location = 's3://{0}/{1}/code'.format(bucket_name, prefix)
    xgboost_model = XGBoostModel(
        name='end-to-end-ml-sm-xgb-model-{0}'.format(str(int(time.time()))),
        model_data=training_step.properties.ModelArtifacts.S3ModelArtifacts,
        entry_point='inference.py',
        source_dir=os.path.join(BASE_DIR, 'deploy/xgboost/'),
        code_location=code_location,
        framework_version='0.90-2',
        py_version='py3',
        role=role,
        sagemaker_session=sagemaker_session)
    pipeline_model_name = 'end-to-end-ml-sm-xgb-skl-pipeline-{0}'.format(
        str(int(time.time())))
    pipeline_model = PipelineModel(name=pipeline_model_name,
                                   role=role,
                                   models=[sklearn_model, xgboost_model],
                                   sagemaker_session=sagemaker_session)

    register_model_step = RegisterModel(
        name='RegisterModel',
        content_types=['text/csv'],
        response_types=['application/json', 'text/csv'],
        inference_instances=[deploy_instance_type_param, 'ml.m5.large'],
        transform_instances=['ml.c5.4xlarge'],
        model_package_group_name=model_package_group_name,
        approval_status=model_approval_status_param,
        model=pipeline_model)
    # --------------------------
    # Pipeline
    # --------------------------

    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[
            raw_data_path_param, train_data_path_param, val_data_path_param,
            model_path_param, processing_instance_type_param,
            processing_instance_count_param, train_test_split_ratio_param,
            max_depth_param, eta_param, gamma_param, min_child_weight_param,
            objective_param, num_round_param, eval_metric_param,
            training_instance_type_param, training_instance_count_param,
            output_path_param, deploy_instance_type_param,
            model_approval_status_param
        ],
        steps=[processing_step, training_step, register_model_step],
        sagemaker_session=sagemaker_session,
    )
    response = pipeline.upsert(role_arn=role)
    print(response["PipelineArn"])
    return pipeline