Ejemplo n.º 1
0
def test_sagemakermodel_passes_correct_params_to_scala():

    model_image = "model-abc-123"
    model_path = S3DataPath("my-bucket", "model-abc-123")
    role_arn = "role-789"
    endpoint_instance_type = "c4.8xlarge"

    model = SageMakerModel(
        endpointInstanceType=endpoint_instance_type,
        endpointInitialInstanceCount=2,
        requestRowSerializer=ProtobufRequestRowSerializer(),
        responseRowDeserializer=KMeansProtobufResponseRowDeserializer(),
        modelImage=model_image,
        modelPath=model_path,
        modelEnvironmentVariables=None,
        modelExecutionRoleARN=role_arn,
        endpointCreationPolicy=EndpointCreationPolicy.DO_NOT_CREATE,
        sagemakerClient=SageMakerClients.create_sagemaker_client(),
        prependResultRows=False,
        namePolicy=None,
        uid="uid")

    assert model.modelImage == model_image
    assert model.modelPath.bucket == model_path.bucket
    assert model.modelExecutionRoleARN == role_arn
    assert model.endpointInstanceType == endpoint_instance_type
    assert model.existingEndpointName is None
Ejemplo n.º 2
0
def test_sagemakermodel_can_do_resource_cleanup():
    endpoint_name = "my-existing-endpoint-123"
    model = SageMakerModel(
        endpointInstanceType="x1.128xlarge",
        endpointInitialInstanceCount=2,
        requestRowSerializer=ProtobufRequestRowSerializer(),
        responseRowDeserializer=KMeansProtobufResponseRowDeserializer(),
        existingEndpointName=endpoint_name,
        modelImage="some_image",
        modelPath=S3DataPath("a", "b"),
        modelEnvironmentVariables=None,
        modelExecutionRoleARN="role",
        endpointCreationPolicy=EndpointCreationPolicy.DO_NOT_CREATE,
        sagemakerClient=SageMakerClients.create_sagemaker_client(),
        prependResultRows=False,
        namePolicy=None,
        uid="uid")

    sm = model.sagemakerClient
    assert sm is not None

    resource_cleanup = SageMakerResourceCleanup(sm)
    assert resource_cleanup is not None

    created_resources = model.getCreatedResources()
    assert created_resources is not None

    resource_cleanup.deleteResources(created_resources)
    def __init__(
            self,
            trainingInstanceType,
            trainingInstanceCount,
            endpointInstanceType,
            endpointInitialInstanceCount,
            sagemakerRole=IAMRoleFromConfig(),
            requestRowSerializer=ProtobufRequestRowSerializer(),
            responseRowDeserializer=LinearLearnerBinaryClassifierProtobufResponseRowDeserializer(),
            trainingInputS3DataPath=S3AutoCreatePath(),
            trainingOutputS3DataPath=S3AutoCreatePath(),
            trainingInstanceVolumeSizeInGB=1024,
            trainingProjectedColumns=None,
            trainingChannelName="train",
            trainingContentType=None,
            trainingS3DataDistribution="ShardedByS3Key",
            trainingSparkDataFormat="sagemaker",
            trainingSparkDataFormatOptions=None,
            trainingInputMode="File",
            trainingCompressionCodec=None,
            trainingMaxRuntimeInSeconds=24*60*60,
            trainingKmsKeyId=None,
            modelEnvironmentVariables=None,
            endpointCreationPolicy=EndpointCreationPolicy.CREATE_ON_CONSTRUCT,
            sagemakerClient=SageMakerClients.create_sagemaker_client(),
            region=None,
            s3Client=SageMakerClients.create_s3_default_client(),
            stsClient=SageMakerClients.create_sts_default_client(),
            modelPrependInputRowsToTransformationRows=True,
            deleteStagingDataAfterTraining=True,
            namePolicyFactory=RandomNamePolicyFactory(),
            uid=None,
            javaObject=None):

        if trainingSparkDataFormatOptions is None:
            trainingSparkDataFormatOptions = {}

        if modelEnvironmentVariables is None:
            modelEnvironmentVariables = {}

        if uid is None:
            uid = Identifiable._randomUID()

        kwargs = locals().copy()
        del kwargs['self']

        super(LinearLearnerBinaryClassifier, self).__init__(**kwargs)

        default_params = {
            'predictor_type': 'binary_classifier'
        }

        self._setDefault(**default_params)
Ejemplo n.º 4
0
    def fromEndpoint(
            cls,
            endpointName,
            requestRowSerializer,
            responseRowDeserializer,
            modelEnvironmentVariables=None,
            sagemakerClient=SageMakerClients.create_sagemaker_client(),
            prependResultRows=True,
            namePolicy=RandomNamePolicy(),
            uid="sagemaker"):
        """ Creates a JavaSageMakerModel from existing model data in S3.

        The returned JavaSageMakerModel can be used to transform Dataframes.

        Args:
            endpointName (str): The name of an endpoint that is currently in service.
            requestRowSerializer (RequestRowSerializer): Serializes a row to an array of bytes.
            responseRowDeserializer (ResponseRowDeserializer): Deserializes an array of bytes to a
                series of rows.
            modelEnvironmentVariables: The environment variables that SageMaker will set on the
                model container during execution.
            sagemakerClient (AmazonSageMaker) Amazon SageMaker client. Used to send
                CreateTrainingJob, CreateModel, and CreateEndpoint requests.
            prependResultRows (bool): Whether the transformation result should also include the
                input Rows. If true, each output Row is formed by a concatenation of the input Row
                with the corresponding Row produced by SageMaker invocation, produced by
                responseRowDeserializer. If false, each output Row is just taken from
                responseRowDeserializer.
            namePolicy (NamePolicy): The NamePolicy to use when naming SageMaker entities created
                during usage of the returned model.
            uid (String): The unique identifier of the SageMakerModel. Used to represent the stage
                in Spark ML pipelines.

        Returns:
            JavaSageMakerModel:
                A JavaSageMakerModel that sends InvokeEndpoint requests to an endpoint hosting
                the training job's model.

        """

        scala_function = "%s.fromEndpoint" % SageMakerModel._wrapped_class

        if modelEnvironmentVariables is None:
            modelEnvironmentVariables = {}

        return SageMakerJavaWrapper()._new_java_obj(
            scala_function, endpointName, requestRowSerializer,
            responseRowDeserializer, modelEnvironmentVariables,
            sagemakerClient, prependResultRows, namePolicy, uid)
Ejemplo n.º 5
0
    def __init__(self,
                 endpointInstanceType,
                 endpointInitialInstanceCount,
                 requestRowSerializer,
                 responseRowDeserializer,
                 existingEndpointName=None,
                 modelImage=None,
                 modelPath=None,
                 modelEnvironmentVariables=None,
                 modelExecutionRoleARN=None,
                 endpointCreationPolicy=EndpointCreationPolicy.CREATE_ON_CONSTRUCT,
                 sagemakerClient=SageMakerClients.create_sagemaker_client(),
                 prependResultRows=True,
                 namePolicy=RandomNamePolicy(),
                 uid=None,
                 javaObject=None):

        super(SageMakerModel, self).__init__()

        if modelEnvironmentVariables is None:
            modelEnvironmentVariables = {}

        if javaObject:
            self._java_obj = javaObject
        else:
            if uid is None:
                uid = Identifiable._randomUID()

            self._java_obj = self._new_java_obj(
                SageMakerModel._wrapped_class,
                Option(endpointInstanceType),
                Option(endpointInitialInstanceCount),
                requestRowSerializer,
                responseRowDeserializer,
                Option(existingEndpointName),
                Option(modelImage),
                Option(modelPath),
                modelEnvironmentVariables,
                Option(modelExecutionRoleARN),
                endpointCreationPolicy,
                sagemakerClient,
                prependResultRows,
                namePolicy,
                uid
            )
        self._resetUid(self._call_java("uid"))
def test_linearLearnerBinaryClassifier_passes_correct_params_to_scala():

    training_instance_type = "c4.8xlarge"
    training_instance_count = 3
    endpoint_instance_type = "c4.8xlarge"
    endpoint_initial_instance_count = 3

    training_bucket = "random-bucket"
    input_prefix = "linear-learner-binary-classifier-training"
    output_prefix = "linear-learner-binary-classifier-out"
    integTestingRole = "arn:aws:iam::123456789:role/SageMakerRole"

    estimator = LinearLearnerBinaryClassifier(
        trainingInstanceType=training_instance_type,
        trainingInstanceCount=training_instance_count,
        endpointInstanceType=endpoint_instance_type,
        endpointInitialInstanceCount=endpoint_initial_instance_count,
        sagemakerRole=IAMRole(integTestingRole),
        requestRowSerializer=ProtobufRequestRowSerializer(),
        responseRowDeserializer=
        LinearLearnerBinaryClassifierProtobufResponseRowDeserializer(),
        trainingInstanceVolumeSizeInGB=2048,
        trainingInputS3DataPath=S3DataPath(training_bucket, input_prefix),
        trainingOutputS3DataPath=S3DataPath(training_bucket, output_prefix),
        trainingMaxRuntimeInSeconds=1,
        endpointCreationPolicy=EndpointCreationPolicy.CREATE_ON_TRANSFORM,
        sagemakerClient=SageMakerClients.create_sagemaker_client(),
        s3Client=SageMakerClients.create_s3_default_client(),
        stsClient=SageMakerClients.create_sts_default_client(),
        modelPrependInputRowsToTransformationRows=True,
        namePolicyFactory=RandomNamePolicyFactory(),
        uid="sagemaker")

    assert estimator.trainingInputS3DataPath.bucket == training_bucket
    assert estimator.trainingInputS3DataPath.objectPath == input_prefix
    assert estimator.trainingInstanceCount == training_instance_count
    assert estimator.trainingInstanceType == training_instance_type
    assert estimator.endpointInstanceType == endpoint_instance_type
    assert estimator.endpointInitialInstanceCount == endpoint_initial_instance_count
    assert estimator.trainingInstanceVolumeSizeInGB == 2048
    assert estimator.trainingMaxRuntimeInSeconds == 1
    assert estimator.trainingKmsKeyId is None
Ejemplo n.º 7
0
def test_sagemakermodel_can_be_created_from_java_obj():
    endpoint_name = "my-existing-endpoint-123"
    model = SageMakerModel(
        endpointInstanceType="x1.128xlarge",
        endpointInitialInstanceCount=2,
        requestRowSerializer=ProtobufRequestRowSerializer(),
        responseRowDeserializer=KMeansProtobufResponseRowDeserializer(),
        existingEndpointName=endpoint_name,
        modelImage="some_image",
        modelPath=S3DataPath("a", "b"),
        modelEnvironmentVariables=None,
        modelExecutionRoleARN="role",
        endpointCreationPolicy=EndpointCreationPolicy.DO_NOT_CREATE,
        sagemakerClient=SageMakerClients.create_sagemaker_client(),
        prependResultRows=False,
        namePolicy=None,
        uid="uid")

    new_model = SageMakerModel._from_java(model._to_java())
    assert new_model.uid == model.uid
Ejemplo n.º 8
0
    def fromModelS3Path(
            cls,
            modelPath,
            modelImage,
            modelExecutionRoleARN,
            endpointInstanceType,
            endpointInitialInstanceCount,
            requestRowSerializer,
            responseRowDeserializer,
            modelEnvironmentVariables=None,
            endpointCreationPolicy=EndpointCreationPolicy.CREATE_ON_CONSTRUCT,
            sagemakerClient=SageMakerClients.create_sagemaker_client(),
            prependResultRows=True,
            namePolicy=RandomNamePolicy(),
            uid="sagemaker"):
        """ Creates a JavaSageMakerModel from existing model data in S3.

        The returned JavaSageMakerModel can be used to transform Dataframes.

        Args:
            modelPath (str): The S3 URI to the model  data to host.
            modelImage (str): The URI of the image that will serve model inferences.
            modelExecutionRoleARN (str): The IAM Role used by SageMaker when running the hosted
                Model and to download model data from S3.
            endpointInstanceType (str): The instance type used to run the model container.
            endpointInitialInstanceCount (int): The initial number of instances used to host the
                model.
            requestRowSerializer (RequestRowSerializer): Serializes a row to an array of bytes.
            responseRowDeserializer (ResponseRowDeserializer): Deserializes an array of bytes to a
                series of rows.
            modelEnvironmentVariables: The environment variables that SageMaker will set on the
                model container during execution.
            endpointCreationPolicy (EndpointCreationPolicy): Whether the endpoint is created upon
                SageMakerModel construction, transformation, or not at all.
            sagemakerClient (AmazonSageMaker) Amazon SageMaker client. Used to send
                CreateTrainingJob, CreateModel, and CreateEndpoint requests.
            prependResultRows (bool): Whether the transformation result should also include the
                input Rows. If true, each output Row is formed by a concatenation of the input Row
                with the corresponding Row produced by SageMaker invocation, produced by
                responseRowDeserializer. If false, each output Row is just taken from
                responseRowDeserializer.
            namePolicy (NamePolicy): The NamePolicy to use when naming SageMaker entities created
                during usage of the returned model.
            uid (String): The unique identifier of the SageMakerModel. Used to represent the stage
                in Spark ML pipelines.

        Returns:
            JavaSageMakerModel:
                A JavaSageMakerModel that sends InvokeEndpoint requests to an endpoint hosting
                the training job's model.

        """

        scala_function = "%s.fromModelS3Path" % SageMakerModel._wrapped_class

        if modelEnvironmentVariables is None:
            modelEnvironmentVariables = {}

        return SageMakerJavaWrapper()._new_java_obj(
            scala_function, modelPath, modelImage, modelExecutionRoleARN,
            endpointInstanceType, endpointInitialInstanceCount,
            requestRowSerializer, responseRowDeserializer,
            modelEnvironmentVariables, endpointCreationPolicy, sagemakerClient,
            prependResultRows, namePolicy, uid)