예제 #1
0
    def __init__(
            self,
            endpoint_name,
            sagemaker_session=None,
            serializer=LibSVMSerializer(),
            deserializer=CSVDeserializer(),
    ):
        """Initialize an ``XGBoostPredictor``.

        Args:
            endpoint_name (str): The name of the endpoint to perform inference on.
            sagemaker_session (sagemaker.session.Session): Session object which manages
                interactions with Amazon SageMaker APIs and any other AWS services needed.
                If not specified, the estimator creates one using the default AWS configuration
                chain.
            serializer (sagemaker.serializers.BaseSerializer): Optional. Default
                serializes input data to LibSVM format
            deserializer (sagemaker.deserializers.BaseDeserializer): Optional.
                Default parses the response from text/csv to a Python list.
        """
        super(XGBoostPredictor, self).__init__(
            endpoint_name,
            sagemaker_session,
            serializer=serializer,
            deserializer=deserializer,
        )
예제 #2
0
def main(deploy_data, train_data):
    inference_id_prefix = 'sts_'  # Comes from deploymodel.py
    outputs = {'inferences': []}

    # AWS especific
    AWS_DEFAULT_REGION = os.getenv('AWS_DEFAULT_REGION', 'eu-west-1')
    AWS_PROFILE = os.getenv('AWS_PROFILE', 'default')
    AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID', None)
    AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY', None)
    b3_session, sm_client, sm_runtime, sm_session = get_sm_session(
        region=AWS_DEFAULT_REGION,
        profile_name=AWS_PROFILE,
        aws_access_key_id=AWS_ACCESS_KEY_ID,
        aws_secret_access_key=AWS_SECRET_ACCESS_KEY)

    # Load a predictor using the endpoint name
    predictor = SKLearnPredictor(
        deploy_data['endpoint']['name'],
        sagemaker_session=sm_session,
        serializer=CSVSerializer(),  # necesary or MQM don't work
        deserializer=CSVDeserializer()  # necesary or MQM don't work
    )

    # read test data
    test_data = load_dataset(train_data['train']['test'],
                             'test.csv',
                             sagemaker_session=sm_session)
    print(f"Loadding {train_data['train']['test']}")

    # remove labels in the test dataset
    test_data.drop(test_data.columns[0], axis=1, inplace=True)

    # Iterate over the test data and call the endpoint for each row,
    # stop for 2 seconds for rows divisible by 3, just to make time
    x_test_rows = test_data.values
    print(f"Sending trafic to the endpoint: {deploy_data['endpoint']['name']}")
    with progressbar.ProgressBar(max_value=len(x_test_rows)) as bar:
        for index, x_test_row in enumerate(x_test_rows, start=1):
            # Auto-generate an inference-id to track the request/response
            # in the captured data
            inference_id = '{}{}'.format(inference_id_prefix, index)

            result = predictor.predict(x_test_row, inference_id=inference_id)

            outputs['inferences'].append({
                inference_id: {
                    'input': x_test_row.tolist(),
                    'result': result
                }
            })

            # show progress
            bar.update(index)

    with open('testendpoint_out.json', 'w') as f:
        json.dump(outputs, f)
예제 #3
0
    def __init__(self, endpoint_name, sagemaker_session=None):
        """Initialize an ``XGBoostPredictor``.

        Args:
            endpoint_name (str): The name of the endpoint to perform inference on.
            sagemaker_session (sagemaker.session.Session): Session object which manages
                interactions with Amazon SageMaker APIs and any other AWS services needed.
                If not specified, the estimator creates one using the default AWS configuration
                chain.
        """
        super(XGBoostPredictor, self).__init__(endpoint_name,
                                               sagemaker_session,
                                               LibSVMSerializer(),
                                               CSVDeserializer())
def test_predict_invocation_with_target_variant_local_mode(
        sagemaker_session, multi_variant_endpoint):

    if sagemaker_session._region_name is None:
        sagemaker_session._region_name = DEFAULT_REGION

    predictor = Predictor(
        endpoint_name=multi_variant_endpoint.endpoint_name,
        sagemaker_session=sagemaker_session,
        serializer=CSVSerializer(),
        deserializer=CSVDeserializer(),
    )

    # Validate that no exception is raised when the target_variant is specified.
    predictor.predict(TEST_CSV_DATA, target_variant=TEST_VARIANT_1)
    predictor.predict(TEST_CSV_DATA, target_variant=TEST_VARIANT_2)
def main():

    image_name = "sagemaker-sklearn-rf-regressor-local"

    # Prepare data for model inference - we use the Boston housing dataset
    print('Preparing data for model inference')
    data = fetch_california_housing()
    X_train, X_test, y_train, y_test = train_test_split(data.data,
                                                        data.target,
                                                        test_size=0.25,
                                                        random_state=42)

    # we don't train a model, so we will need only the testing data
    testX = pd.DataFrame(X_test, columns=data.feature_names)

    # Download a pre-trained model file
    print('Downloading a pre-trained model file')
    s3.download_file(
        'aws-ml-blog',
        'artifacts/scikit_learn_bring_your_own_model/model.joblib',
        'model.joblib')

    # Creating a model.tar.gz file
    tar = tarfile.open('model.tar.gz', 'w:gz')
    tar.add('model.joblib')
    tar.close()

    model = Model(image_uri=image_name,
                  role=DUMMY_IAM_ROLE,
                  model_data='file://./model.tar.gz')

    print('Deploying endpoint in local mode')
    endpoint = model.deploy(initial_instance_count=1,
                            instance_type='local',
                            endpoint_name="my-local-endpoint")

    predictor = Predictor(endpoint_name="my-local-endpoint",
                          sagemaker_session=sagemaker_session,
                          serializer=CSVSerializer(),
                          deserializer=CSVDeserializer())

    predictions = predictor.predict(testX[data.feature_names].head(5).to_csv(
        header=False, index=False))
    print(f"Predictions: {predictions}")

    print('About to delete the endpoint to stop paying (if in cloud mode).')
    predictor.delete_endpoint(predictor.endpoint_name)
def test_predict_call_with_csv():
    sagemaker_session = ret_csv_sagemaker_session()
    predictor = Predictor(ENDPOINT,
                          sagemaker_session,
                          serializer=CSVSerializer(),
                          deserializer=CSVDeserializer())

    data = [1, 2]
    result = predictor.predict(data)

    assert sagemaker_session.sagemaker_runtime_client.invoke_endpoint.called

    expected_request_args = {
        "Accept": CSV_CONTENT_TYPE,
        "Body": "1,2",
        "ContentType": CSV_CONTENT_TYPE,
        "EndpointName": ENDPOINT,
    }
    call_args, kwargs = sagemaker_session.sagemaker_runtime_client.invoke_endpoint.call_args
    assert kwargs == expected_request_args

    assert result == [["1", "2", "3"]]
예제 #7
0
        production_variants = endpoint_config["ProductionVariants"]
        return [d["ModelName"] for d in production_variants]

    @property
    def content_type(self):
        """The MIME type of the data sent to the inference endpoint."""
        return self.serializer.CONTENT_TYPE

    @property
    def accept(self):
        """The content type(s) that are expected from the inference endpoint."""
        return self.deserializer.ACCEPT

    @property
    def endpoint(self):
        """Deprecated attribute. Please use endpoint_name."""
        renamed_warning("The endpoint attribute")
        return self.endpoint_name


csv_serializer = deprecated_serialize(CSVSerializer(), "csv_serializer")
json_serializer = deprecated_serialize(JSONSerializer(), "json_serializer")
npy_serializer = deprecated_serialize(NumpySerializer(), "npy_serializer")
csv_deserializer = deprecated_deserialize(CSVDeserializer(),
                                          "csv_deserializer")
json_deserializer = deprecated_deserialize(JSONDeserializer(),
                                           "json_deserializer")
numpy_deserializer = deprecated_deserialize(NumpyDeserializer(),
                                            "numpy_deserializer")
RealTimePredictor = deprecated_class(Predictor, "RealTimePredictor")
예제 #8
0
def main(datacapture=False):
    # Load config from environment and set required defaults
    # AWS especific
    AWS_DEFAULT_REGION = os.getenv('AWS_DEFAULT_REGION', 'eu-west-1')
    AWS_PROFILE = os.getenv('AWS_PROFILE', 'default')
    AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID', None)
    AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY', None)
    b3_session, sm_client, sm_runtime, sm_session = get_sm_session(
        region=AWS_DEFAULT_REGION,
        profile_name=AWS_PROFILE,
        aws_access_key_id=AWS_ACCESS_KEY_ID,
        aws_secret_access_key=AWS_SECRET_ACCESS_KEY
    )
    ROLE_ARN = os.getenv('AWS_ROLE', sagemaker.get_execution_role())

    MODEL_PACKAGE_GROUP_NAME = os.getenv(
        'MODEL_PACKAGE_GROUP_NAME', 'sts-sklearn-grp')
    BASE_JOB_PREFIX = os.getenv('BASE_JOB_PREFIX', 'sts')

    # define useful const's
    bucket = sm_session.default_bucket()
    endpoint_name = "{}-sklearn-{}".format(
        BASE_JOB_PREFIX,
        datetime.datetime.now().strftime("%Y%m%d%H%M")
    )
    prefix = "{}/{}".format(BASE_JOB_PREFIX, endpoint_name)
    data_capture_prefix = "{}/datacapture".format(prefix)
    s3_capture_upload_path = "s3://{}/{}".format(bucket, data_capture_prefix)
    # outputs is a dict to save to json
    outputs = dict()

    if datacapture is True:
        # if data capture was enabled output the S3 Uri for data capture
        outputs['monitor'] = {
            's3_capture_upload_path': s3_capture_upload_path
        }

    # get the last version aproved in the model package group
    model_package_arn = get_approved_package(
        MODEL_PACKAGE_GROUP_NAME, sm_client)
    _l.info(f"Latest approved model package: {model_package_arn}")
    model_info = sm_client.describe_model_package(
        ModelPackageName=model_package_arn)
    outputs['model_info'] = model_info
    model_uri = model_info.get(
        'InferenceSpecification')['Containers'][0]['ModelDataUrl']
    _l.info(f"Model data uri: {model_uri}")

    sk_model = SKLearnModel(
        model_uri,  # s3 uri for the model.tar.gz
        ROLE_ARN,   # sagemaker role to be used
        'model_loader.py',  # script to load the model
        framework_version='0.23-1'
    )

    data_capture_config=None
    if datacapture is True:
        # if data capture was enabled generated the required config
        _l.info("Enabling data capture as requested")
        _l.info(f"s3_capture_upload_path: {s3_capture_upload_path}")
        data_capture_config = DataCaptureConfig(
            enable_capture=True, sampling_percentage=100, 
            destination_s3_uri=s3_capture_upload_path,
            capture_options=["REQUEST", "RESPONSE"],
            sagemaker_session=sm_session
        )

    # Deploy the endpoint
    predictor = sk_model.deploy(
        instance_type="ml.m5.xlarge", 
        initial_instance_count=1,
        serializer=CSVSerializer(),
        deserializer=CSVDeserializer(),
        data_capture_config=data_capture_config,
        endpoint_name=endpoint_name
    )

    _l.info(f"Endpoint name: {predictor.endpoint_name}")
    outputs['endpoint'] = {
        'name': endpoint_name,
        'config_name': predictor.endpoint_name # is the same as the endpoint ?
    }
    outputs['model_info'].update({"name": sk_model.name})
    # ENDPOINT deploy done

    # save useful outputs to a file
    with open('deploymodel_out.json', 'w') as f:
        json.dump(outputs, f, default=json_default)
예제 #9
0
from sagemaker.model_monitor import DataCaptureConfig

s3_capture_path = "s3://monitoring/xgb-churn-data"


data_capture_config = DataCaptureConfig(
    enable_capture=True, sampling_percentage=100, destination_s3_uri=s3_capture_path
)


from sagemaker.deserializers import CSVDeserializer

predictor = model.deploy(
    initial_instance_count=1,
    instance_type="ml.m4.large",
    endpoint_name="xgb-churn-monitor",
    data_capture_config=data_capture_config,
    deserializer=CSVDeserializer(),
)
def csv_deserializer():
    return CSVDeserializer()