def disable_data_capture(self):
     """Updates the DataCaptureConfig for the Predictor's associated Amazon SageMaker Endpoint
     to disable data capture. For a more customized experience, refer to
     update_data_capture_config, instead.
     """
     self.update_data_capture_config(data_capture_config=DataCaptureConfig(
         enable_capture=False, sagemaker_session=self.sagemaker_session))
Beispiel #2
0
    def deploy_model(self):

        if self.monitor:
            from sagemaker.model_monitor import DataCaptureConfig

            data_capture_config = DataCaptureConfig(
                enable_capture=True,
                sampling_percentage=100,
                destination_s3_uri="s3://{}/ezsmdeploy/model-{}/datacapture".
                format(self.bucket, self.name),
            )
        else:
            data_capture_config = None

        self.predictor = self.sagemakermodel.deploy(
            initial_instance_count=self.instance_count,
            instance_type=self.instance_type,
            accelerator_type=self.ei,
            endpoint_name="ezsmdeploy-endpoint-" + self.name,
            update_endpoint=False,
            wait=self.wait,
            data_capture_config=data_capture_config,
        )

        self.endpoint_name = "ezsmdeploy-endpoint-" + self.name
def endpoint_name(sagemaker_session):
    endpoint_name = unique_name_from_base("model-quality-monitor-integ")
    xgb_model_data = sagemaker_session.upload_data(
        path=os.path.join(XGBOOST_DATA_PATH, "xgb_model.tar.gz"),
        key_prefix="integ-test-data/xgboost/model",
    )

    xgb_image = image_uris.retrieve("xgboost",
                                    sagemaker_session.boto_region_name,
                                    version="1",
                                    image_scope="inference")

    with tests.integ.timeout.timeout_and_delete_endpoint_by_name(
            endpoint_name=endpoint_name,
            sagemaker_session=sagemaker_session,
            hours=2):
        xgb_model = Model(
            model_data=xgb_model_data,
            image_uri=xgb_image,
            name=endpoint_name,  # model name
            role=ROLE,
            sagemaker_session=sagemaker_session,
        )
        xgb_model.deploy(
            INSTANCE_COUNT,
            INSTANCE_TYPE,
            endpoint_name=endpoint_name,
            data_capture_config=DataCaptureConfig(
                True, sagemaker_session=sagemaker_session),
        )
        yield endpoint_name
    def enable_data_capture(self):
        """Enables data capture by updating DataCaptureConfig.

        This function updates the DataCaptureConfig for the Predictor's associated Amazon SageMaker
        Endpoint to enable data capture. For a more customized experience, refer to
        update_data_capture_config, instead.
        """
        self.update_data_capture_config(data_capture_config=DataCaptureConfig(
            enable_capture=True, sagemaker_session=self.sagemaker_session))
Beispiel #5
0
def test_to_request_dict_returns_correct_default_params_when_optionals_not_provided():
    data_capture_config = DataCaptureConfig(
        enable_capture=DEFAULT_ENABLE_CAPTURE, destination_s3_uri=DEFAULT_DESTINATION_S3_URI
    )

    assert data_capture_config.enable_capture == DEFAULT_ENABLE_CAPTURE
    assert data_capture_config.sampling_percentage == DEFAULT_SAMPLING_PERCENTAGE
    assert data_capture_config.destination_s3_uri == DEFAULT_DESTINATION_S3_URI
    assert data_capture_config.kms_key_id == DEFAULT_KMS_KEY_ID
    assert data_capture_config.csv_content_types == DEFAULT_CSV_CONTENT_TYPES
    assert data_capture_config.json_content_types == DEFAULT_JSON_CONTENT_TYPES
Beispiel #6
0
def test_init_when_optionals_not_provided():
    sagemaker_session = Mock()
    sagemaker_session.default_bucket.return_value = DEFAULT_BUCKET_NAME

    data_capture_config = DataCaptureConfig(
        enable_capture=DEFAULT_ENABLE_CAPTURE,
        sagemaker_session=sagemaker_session)

    assert data_capture_config.enable_capture == DEFAULT_ENABLE_CAPTURE
    assert data_capture_config.sampling_percentage == DEFAULT_SAMPLING_PERCENTAGE
    assert data_capture_config.destination_s3_uri == DEFAULT_DESTINATION_S3_URI
    assert data_capture_config.kms_key_id == DEFAULT_KMS_KEY_ID
    assert data_capture_config.csv_content_types == DEFAULT_CSV_CONTENT_TYPES
    assert data_capture_config.json_content_types == DEFAULT_JSON_CONTENT_TYPES
def test_endpoint_config_step_creation(pca_model):
    data_capture_config = DataCaptureConfig(
        enable_capture=True,
        sampling_percentage=100,
        destination_s3_uri='s3://sagemaker/datacapture')
    step = EndpointConfigStep(
        'Endpoint Config',
        endpoint_config_name='MyEndpointConfig',
        model_name='pca-model',
        initial_instance_count=1,
        instance_type='ml.p2.xlarge',
        data_capture_config=data_capture_config,
        tags=DEFAULT_TAGS,
    )
    assert step.to_dict() == {
        'Type': 'Task',
        'Parameters': {
            'EndpointConfigName':
            'MyEndpointConfig',
            'ProductionVariants': [{
                'InitialInstanceCount': 1,
                'InstanceType': 'ml.p2.xlarge',
                'ModelName': 'pca-model',
                'VariantName': 'AllTraffic'
            }],
            'DataCaptureConfig': {
                'EnableCapture':
                True,
                'InitialSamplingPercentage':
                100,
                'DestinationS3Uri':
                's3://sagemaker/datacapture',
                'CaptureOptions': [{
                    'CaptureMode': 'Input'
                }, {
                    'CaptureMode': 'Output'
                }],
                'CaptureContentTypeHeader': {
                    'CsvContentTypes': ['text/csv'],
                    'JsonContentTypes': ['application/json']
                }
            },
            'Tags':
            DEFAULT_TAGS_LIST
        },
        'Resource': 'arn:aws:states:::sagemaker:createEndpointConfig',
        'End': True
    }
Beispiel #8
0
def test_init_when_non_defaults_provided():
    data_capture_config = DataCaptureConfig(
        enable_capture=NON_DEFAULT_ENABLE_CAPTURE,
        sampling_percentage=NON_DEFAULT_SAMPLING_PERCENTAGE,
        destination_s3_uri=NON_DEFAULT_DESTINATION_S3_URI,
        kms_key_id=NON_DEFAULT_KMS_KEY_ID,
        csv_content_types=NON_DEFAULT_CSV_CONTENT_TYPES,
        json_content_types=NON_DEFAULT_JSON_CONTENT_TYPES,
    )

    assert data_capture_config.enable_capture == NON_DEFAULT_ENABLE_CAPTURE
    assert data_capture_config.sampling_percentage == NON_DEFAULT_SAMPLING_PERCENTAGE
    assert data_capture_config.destination_s3_uri == NON_DEFAULT_DESTINATION_S3_URI
    assert data_capture_config.kms_key_id == NON_DEFAULT_KMS_KEY_ID
    assert data_capture_config.csv_content_types == NON_DEFAULT_CSV_CONTENT_TYPES
    assert data_capture_config.json_content_types == NON_DEFAULT_JSON_CONTENT_TYPES
Beispiel #9
0
def capture():
    # Change parameters as you would like - adjust sampling percentage, 
    #  chose to capture request or response or both.
    #  Learn more from our documentation
    data_capture_config = DataCaptureConfig(
                            enable_capture = True,
                            sampling_percentage=50,
                            destination_s3_uri=s3_capture_upload_path,
                            kms_key_id=None,
                            capture_options=["REQUEST", "RESPONSE"],
                            csv_content_types=["text/csv"],
                            json_content_types=["application/json"])

    # Now it is time to apply the new configuration and wait for it to be applied
    predictor = RealTimePredictor(endpoint=endpoint_name)
    predictor.update_data_capture_config(data_capture_config=data_capture_config)
    sm_session.wait_for_endpoint(endpoint=endpoint_name)
def test_disabling_data_capture_on_endpoint_shows_correct_data_capture_status(
        sagemaker_session, tensorflow_inference_latest_version):
    endpoint_name = unique_name_from_base("sagemaker-tensorflow-serving")
    model_data = sagemaker_session.upload_data(
        path=os.path.join(tests.integ.DATA_DIR,
                          "tensorflow-serving-test-model.tar.gz"),
        key_prefix="tensorflow-serving/models",
    )
    with tests.integ.timeout.timeout_and_delete_endpoint_by_name(
            endpoint_name, sagemaker_session):
        model = TensorFlowModel(
            model_data=model_data,
            role=ROLE,
            framework_version=tensorflow_inference_latest_version,
            sagemaker_session=sagemaker_session,
        )
        destination_s3_uri = os.path.join("s3://",
                                          sagemaker_session.default_bucket(),
                                          endpoint_name, "custom")
        predictor = model.deploy(
            initial_instance_count=INSTANCE_COUNT,
            instance_type=INSTANCE_TYPE,
            endpoint_name=endpoint_name,
            data_capture_config=DataCaptureConfig(
                enable_capture=True,
                sampling_percentage=CUSTOM_SAMPLING_PERCENTAGE,
                destination_s3_uri=destination_s3_uri,
                capture_options=CUSTOM_CAPTURE_OPTIONS,
                csv_content_types=CUSTOM_CSV_CONTENT_TYPES,
                json_content_types=CUSTOM_JSON_CONTENT_TYPES,
                sagemaker_session=sagemaker_session,
            ),
        )

        endpoint_desc = sagemaker_session.sagemaker_client.describe_endpoint(
            EndpointName=predictor.endpoint_name)

        endpoint_config_desc = sagemaker_session.sagemaker_client.describe_endpoint_config(
            EndpointConfigName=endpoint_desc["EndpointConfigName"])

        assert endpoint_config_desc["DataCaptureConfig"]["EnableCapture"]
        assert (endpoint_config_desc["DataCaptureConfig"]
                ["InitialSamplingPercentage"] == CUSTOM_SAMPLING_PERCENTAGE)
        assert endpoint_config_desc["DataCaptureConfig"]["CaptureOptions"] == [
            {
                "CaptureMode": "Input"
            }
        ]
        assert (endpoint_config_desc["DataCaptureConfig"]
                ["CaptureContentTypeHeader"]["CsvContentTypes"] ==
                CUSTOM_CSV_CONTENT_TYPES)
        assert (endpoint_config_desc["DataCaptureConfig"]
                ["CaptureContentTypeHeader"]["JsonContentTypes"] ==
                CUSTOM_JSON_CONTENT_TYPES)

        predictor.disable_data_capture()

        # Wait for endpoint to finish updating
        # Endpoint update takes ~7min. 25 retries * 60s sleeps = 25min timeout
        for _ in retries(
                max_retry_count=25,
                exception_message_prefix=
                "Waiting for 'InService' endpoint status",
                seconds_to_sleep=60,
        ):
            new_endpoint = sagemaker_session.sagemaker_client.describe_endpoint(
                EndpointName=predictor.endpoint_name)
            if new_endpoint["EndpointStatus"] == "InService":
                break

        endpoint_desc = sagemaker_session.sagemaker_client.describe_endpoint(
            EndpointName=predictor.endpoint_name)

        endpoint_config_desc = sagemaker_session.sagemaker_client.describe_endpoint_config(
            EndpointConfigName=endpoint_desc["EndpointConfigName"])

        assert not endpoint_config_desc["DataCaptureConfig"]["EnableCapture"]
Beispiel #11
0
def main(datacapture=False):
    # Load config from environment and set required defaults
    # AWS especific
    AWS_DEFAULT_REGION = os.getenv('AWS_DEFAULT_REGION', 'eu-west-1')
    AWS_PROFILE = os.getenv('AWS_PROFILE', 'default')
    AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID', None)
    AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY', None)
    b3_session, sm_client, sm_runtime, sm_session = get_sm_session(
        region=AWS_DEFAULT_REGION,
        profile_name=AWS_PROFILE,
        aws_access_key_id=AWS_ACCESS_KEY_ID,
        aws_secret_access_key=AWS_SECRET_ACCESS_KEY
    )
    ROLE_ARN = os.getenv('AWS_ROLE', sagemaker.get_execution_role())

    MODEL_PACKAGE_GROUP_NAME = os.getenv(
        'MODEL_PACKAGE_GROUP_NAME', 'sts-sklearn-grp')
    BASE_JOB_PREFIX = os.getenv('BASE_JOB_PREFIX', 'sts')

    # define useful const's
    bucket = sm_session.default_bucket()
    endpoint_name = "{}-sklearn-{}".format(
        BASE_JOB_PREFIX,
        datetime.datetime.now().strftime("%Y%m%d%H%M")
    )
    prefix = "{}/{}".format(BASE_JOB_PREFIX, endpoint_name)
    data_capture_prefix = "{}/datacapture".format(prefix)
    s3_capture_upload_path = "s3://{}/{}".format(bucket, data_capture_prefix)
    # outputs is a dict to save to json
    outputs = dict()

    if datacapture is True:
        # if data capture was enabled output the S3 Uri for data capture
        outputs['monitor'] = {
            's3_capture_upload_path': s3_capture_upload_path
        }

    # get the last version aproved in the model package group
    model_package_arn = get_approved_package(
        MODEL_PACKAGE_GROUP_NAME, sm_client)
    _l.info(f"Latest approved model package: {model_package_arn}")
    model_info = sm_client.describe_model_package(
        ModelPackageName=model_package_arn)
    outputs['model_info'] = model_info
    model_uri = model_info.get(
        'InferenceSpecification')['Containers'][0]['ModelDataUrl']
    _l.info(f"Model data uri: {model_uri}")

    sk_model = SKLearnModel(
        model_uri,  # s3 uri for the model.tar.gz
        ROLE_ARN,   # sagemaker role to be used
        'model_loader.py',  # script to load the model
        framework_version='0.23-1'
    )

    data_capture_config=None
    if datacapture is True:
        # if data capture was enabled generated the required config
        _l.info("Enabling data capture as requested")
        _l.info(f"s3_capture_upload_path: {s3_capture_upload_path}")
        data_capture_config = DataCaptureConfig(
            enable_capture=True, sampling_percentage=100, 
            destination_s3_uri=s3_capture_upload_path,
            capture_options=["REQUEST", "RESPONSE"],
            sagemaker_session=sm_session
        )

    # Deploy the endpoint
    predictor = sk_model.deploy(
        instance_type="ml.m5.xlarge", 
        initial_instance_count=1,
        serializer=CSVSerializer(),
        deserializer=CSVDeserializer(),
        data_capture_config=data_capture_config,
        endpoint_name=endpoint_name
    )

    _l.info(f"Endpoint name: {predictor.endpoint_name}")
    outputs['endpoint'] = {
        'name': endpoint_name,
        'config_name': predictor.endpoint_name # is the same as the endpoint ?
    }
    outputs['model_info'].update({"name": sk_model.name})
    # ENDPOINT deploy done

    # save useful outputs to a file
    with open('deploymodel_out.json', 'w') as f:
        json.dump(outputs, f, default=json_default)
Beispiel #12
0
sagemaker_session = sagemaker.session.Session(
    boto_session=boto_session,
    sagemaker_client=sagemaker_boto_client
)

# Enable real-time inference data capture

s3_capture_upload_path = f's3://{bucket}/{prefix}/endpoint-data-capture/' #example: s3://bucket-name/path/to/endpoint-data-capture/

# Change parameters as you would like - adjust sampling percentage, 
#  chose to capture request or response or both
data_capture_config = DataCaptureConfig(
    enable_capture = True,
    sampling_percentage=25,
    destination_s3_uri=s3_capture_upload_path,
    kms_key_id=None,
    capture_options=["REQUEST", "RESPONSE"],
    csv_content_types=["text/csv"],
    json_content_types=["application/json"]
)

# Now it is time to apply the new configuration
predictor = Predictor(endpoint_name=endpoint_name, sagemaker_session=sagemaker_session)
predictor.update_data_capture_config(data_capture_config=data_capture_config)

print('Created Predictor at endpoint {}'.format(endpoint_name))

baseline_data_uri = args.baseline_data_uri ##'s3://bucketname/path/to/baseline/data' - Where your validation data is
baseline_results_uri = f's3://{bucket}/{prefix}/baseline/results' ##'s3://bucketname/path/to/baseline/data' - Where the results are to be stored in

print('Baseline data is at {}'.format(baseline_data_uri))
Beispiel #13
0
from sagemaker.model_monitor import DataCaptureConfig

s3_capture_path = "s3://monitoring/xgb-churn-data"


data_capture_config = DataCaptureConfig(
    enable_capture=True, sampling_percentage=100, destination_s3_uri=s3_capture_path
)


from sagemaker.deserializers import CSVDeserializer

predictor = model.deploy(
    initial_instance_count=1,
    instance_type="ml.m4.large",
    endpoint_name="xgb-churn-monitor",
    data_capture_config=data_capture_config,
    deserializer=CSVDeserializer(),
)
        'Containers':
        check_autopilot_job_status.output()['Payload']['InferenceContainers'],
        'ModelName':
        execution_input['ModelName'],
        'ExecutionRoleArn':
        sagemaker_exec_role
    })

endpoint_config_step = EndpointConfigStep(
    'CreateModelEndpointConfig',
    endpoint_config_name=execution_input['EndpointConfigName'],
    model_name=execution_input['ModelName'],
    initial_instance_count=1,
    instance_type='ml.m4.xlarge',
    data_capture_config=DataCaptureConfig(
        enable_capture=True,
        sampling_percentage=100,
    ))

endpoint_step = EndpointStep(
    'UpdateModelEndpoint',
    endpoint_name=execution_input['EndpointName'],
    endpoint_config_name=execution_input['EndpointConfigName'],
    update=False)

# define Amazon CodeBuild Step Functions Task
deploy_rest_api_task = Task(
    'DeployRestAPI',
    resource='arn:aws:states:::codebuild:startBuild.sync',
    parameters={
        'ProjectName':
        utils.get_api_codebuild_project(),