def test_sagemakermodel_passes_correct_params_to_scala(): model_image = "model-abc-123" model_path = S3DataPath("my-bucket", "model-abc-123") role_arn = "role-789" endpoint_instance_type = "c4.8xlarge" model = SageMakerModel( endpointInstanceType=endpoint_instance_type, endpointInitialInstanceCount=2, requestRowSerializer=ProtobufRequestRowSerializer(), responseRowDeserializer=KMeansProtobufResponseRowDeserializer(), modelImage=model_image, modelPath=model_path, modelEnvironmentVariables=None, modelExecutionRoleARN=role_arn, endpointCreationPolicy=EndpointCreationPolicy.DO_NOT_CREATE, sagemakerClient=SageMakerClients.create_sagemaker_client(), prependResultRows=False, namePolicy=None, uid="uid") assert model.modelImage == model_image assert model.modelPath.bucket == model_path.bucket assert model.modelExecutionRoleARN == role_arn assert model.endpointInstanceType == endpoint_instance_type assert model.existingEndpointName is None
def test_sagemakermodel_can_do_resource_cleanup(): endpoint_name = "my-existing-endpoint-123" model = SageMakerModel( endpointInstanceType="x1.128xlarge", endpointInitialInstanceCount=2, requestRowSerializer=ProtobufRequestRowSerializer(), responseRowDeserializer=KMeansProtobufResponseRowDeserializer(), existingEndpointName=endpoint_name, modelImage="some_image", modelPath=S3DataPath("a", "b"), modelEnvironmentVariables=None, modelExecutionRoleARN="role", endpointCreationPolicy=EndpointCreationPolicy.DO_NOT_CREATE, sagemakerClient=SageMakerClients.create_sagemaker_client(), prependResultRows=False, namePolicy=None, uid="uid") sm = model.sagemakerClient assert sm is not None resource_cleanup = SageMakerResourceCleanup(sm) assert resource_cleanup is not None created_resources = model.getCreatedResources() assert created_resources is not None resource_cleanup.deleteResources(created_resources)
def __init__( self, trainingInstanceType, trainingInstanceCount, endpointInstanceType, endpointInitialInstanceCount, sagemakerRole=IAMRoleFromConfig(), requestRowSerializer=ProtobufRequestRowSerializer(), responseRowDeserializer=LinearLearnerBinaryClassifierProtobufResponseRowDeserializer(), trainingInputS3DataPath=S3AutoCreatePath(), trainingOutputS3DataPath=S3AutoCreatePath(), trainingInstanceVolumeSizeInGB=1024, trainingProjectedColumns=None, trainingChannelName="train", trainingContentType=None, trainingS3DataDistribution="ShardedByS3Key", trainingSparkDataFormat="sagemaker", trainingSparkDataFormatOptions=None, trainingInputMode="File", trainingCompressionCodec=None, trainingMaxRuntimeInSeconds=24*60*60, trainingKmsKeyId=None, modelEnvironmentVariables=None, endpointCreationPolicy=EndpointCreationPolicy.CREATE_ON_CONSTRUCT, sagemakerClient=SageMakerClients.create_sagemaker_client(), region=None, s3Client=SageMakerClients.create_s3_default_client(), stsClient=SageMakerClients.create_sts_default_client(), modelPrependInputRowsToTransformationRows=True, deleteStagingDataAfterTraining=True, namePolicyFactory=RandomNamePolicyFactory(), uid=None, javaObject=None): if trainingSparkDataFormatOptions is None: trainingSparkDataFormatOptions = {} if modelEnvironmentVariables is None: modelEnvironmentVariables = {} if uid is None: uid = Identifiable._randomUID() kwargs = locals().copy() del kwargs['self'] super(LinearLearnerBinaryClassifier, self).__init__(**kwargs) default_params = { 'predictor_type': 'binary_classifier' } self._setDefault(**default_params)
def fromEndpoint( cls, endpointName, requestRowSerializer, responseRowDeserializer, modelEnvironmentVariables=None, sagemakerClient=SageMakerClients.create_sagemaker_client(), prependResultRows=True, namePolicy=RandomNamePolicy(), uid="sagemaker"): """ Creates a JavaSageMakerModel from existing model data in S3. The returned JavaSageMakerModel can be used to transform Dataframes. Args: endpointName (str): The name of an endpoint that is currently in service. requestRowSerializer (RequestRowSerializer): Serializes a row to an array of bytes. responseRowDeserializer (ResponseRowDeserializer): Deserializes an array of bytes to a series of rows. modelEnvironmentVariables: The environment variables that SageMaker will set on the model container during execution. sagemakerClient (AmazonSageMaker) Amazon SageMaker client. Used to send CreateTrainingJob, CreateModel, and CreateEndpoint requests. prependResultRows (bool): Whether the transformation result should also include the input Rows. If true, each output Row is formed by a concatenation of the input Row with the corresponding Row produced by SageMaker invocation, produced by responseRowDeserializer. If false, each output Row is just taken from responseRowDeserializer. namePolicy (NamePolicy): The NamePolicy to use when naming SageMaker entities created during usage of the returned model. uid (String): The unique identifier of the SageMakerModel. Used to represent the stage in Spark ML pipelines. Returns: JavaSageMakerModel: A JavaSageMakerModel that sends InvokeEndpoint requests to an endpoint hosting the training job's model. """ scala_function = "%s.fromEndpoint" % SageMakerModel._wrapped_class if modelEnvironmentVariables is None: modelEnvironmentVariables = {} return SageMakerJavaWrapper()._new_java_obj( scala_function, endpointName, requestRowSerializer, responseRowDeserializer, modelEnvironmentVariables, sagemakerClient, prependResultRows, namePolicy, uid)
def __init__(self, endpointInstanceType, endpointInitialInstanceCount, requestRowSerializer, responseRowDeserializer, existingEndpointName=None, modelImage=None, modelPath=None, modelEnvironmentVariables=None, modelExecutionRoleARN=None, endpointCreationPolicy=EndpointCreationPolicy.CREATE_ON_CONSTRUCT, sagemakerClient=SageMakerClients.create_sagemaker_client(), prependResultRows=True, namePolicy=RandomNamePolicy(), uid=None, javaObject=None): super(SageMakerModel, self).__init__() if modelEnvironmentVariables is None: modelEnvironmentVariables = {} if javaObject: self._java_obj = javaObject else: if uid is None: uid = Identifiable._randomUID() self._java_obj = self._new_java_obj( SageMakerModel._wrapped_class, Option(endpointInstanceType), Option(endpointInitialInstanceCount), requestRowSerializer, responseRowDeserializer, Option(existingEndpointName), Option(modelImage), Option(modelPath), modelEnvironmentVariables, Option(modelExecutionRoleARN), endpointCreationPolicy, sagemakerClient, prependResultRows, namePolicy, uid ) self._resetUid(self._call_java("uid"))
def test_linearLearnerBinaryClassifier_passes_correct_params_to_scala(): training_instance_type = "c4.8xlarge" training_instance_count = 3 endpoint_instance_type = "c4.8xlarge" endpoint_initial_instance_count = 3 training_bucket = "random-bucket" input_prefix = "linear-learner-binary-classifier-training" output_prefix = "linear-learner-binary-classifier-out" integTestingRole = "arn:aws:iam::123456789:role/SageMakerRole" estimator = LinearLearnerBinaryClassifier( trainingInstanceType=training_instance_type, trainingInstanceCount=training_instance_count, endpointInstanceType=endpoint_instance_type, endpointInitialInstanceCount=endpoint_initial_instance_count, sagemakerRole=IAMRole(integTestingRole), requestRowSerializer=ProtobufRequestRowSerializer(), responseRowDeserializer= LinearLearnerBinaryClassifierProtobufResponseRowDeserializer(), trainingInstanceVolumeSizeInGB=2048, trainingInputS3DataPath=S3DataPath(training_bucket, input_prefix), trainingOutputS3DataPath=S3DataPath(training_bucket, output_prefix), trainingMaxRuntimeInSeconds=1, endpointCreationPolicy=EndpointCreationPolicy.CREATE_ON_TRANSFORM, sagemakerClient=SageMakerClients.create_sagemaker_client(), s3Client=SageMakerClients.create_s3_default_client(), stsClient=SageMakerClients.create_sts_default_client(), modelPrependInputRowsToTransformationRows=True, namePolicyFactory=RandomNamePolicyFactory(), uid="sagemaker") assert estimator.trainingInputS3DataPath.bucket == training_bucket assert estimator.trainingInputS3DataPath.objectPath == input_prefix assert estimator.trainingInstanceCount == training_instance_count assert estimator.trainingInstanceType == training_instance_type assert estimator.endpointInstanceType == endpoint_instance_type assert estimator.endpointInitialInstanceCount == endpoint_initial_instance_count assert estimator.trainingInstanceVolumeSizeInGB == 2048 assert estimator.trainingMaxRuntimeInSeconds == 1 assert estimator.trainingKmsKeyId is None
def test_sagemakermodel_can_be_created_from_java_obj(): endpoint_name = "my-existing-endpoint-123" model = SageMakerModel( endpointInstanceType="x1.128xlarge", endpointInitialInstanceCount=2, requestRowSerializer=ProtobufRequestRowSerializer(), responseRowDeserializer=KMeansProtobufResponseRowDeserializer(), existingEndpointName=endpoint_name, modelImage="some_image", modelPath=S3DataPath("a", "b"), modelEnvironmentVariables=None, modelExecutionRoleARN="role", endpointCreationPolicy=EndpointCreationPolicy.DO_NOT_CREATE, sagemakerClient=SageMakerClients.create_sagemaker_client(), prependResultRows=False, namePolicy=None, uid="uid") new_model = SageMakerModel._from_java(model._to_java()) assert new_model.uid == model.uid
def fromModelS3Path( cls, modelPath, modelImage, modelExecutionRoleARN, endpointInstanceType, endpointInitialInstanceCount, requestRowSerializer, responseRowDeserializer, modelEnvironmentVariables=None, endpointCreationPolicy=EndpointCreationPolicy.CREATE_ON_CONSTRUCT, sagemakerClient=SageMakerClients.create_sagemaker_client(), prependResultRows=True, namePolicy=RandomNamePolicy(), uid="sagemaker"): """ Creates a JavaSageMakerModel from existing model data in S3. The returned JavaSageMakerModel can be used to transform Dataframes. Args: modelPath (str): The S3 URI to the model data to host. modelImage (str): The URI of the image that will serve model inferences. modelExecutionRoleARN (str): The IAM Role used by SageMaker when running the hosted Model and to download model data from S3. endpointInstanceType (str): The instance type used to run the model container. endpointInitialInstanceCount (int): The initial number of instances used to host the model. requestRowSerializer (RequestRowSerializer): Serializes a row to an array of bytes. responseRowDeserializer (ResponseRowDeserializer): Deserializes an array of bytes to a series of rows. modelEnvironmentVariables: The environment variables that SageMaker will set on the model container during execution. endpointCreationPolicy (EndpointCreationPolicy): Whether the endpoint is created upon SageMakerModel construction, transformation, or not at all. sagemakerClient (AmazonSageMaker) Amazon SageMaker client. Used to send CreateTrainingJob, CreateModel, and CreateEndpoint requests. prependResultRows (bool): Whether the transformation result should also include the input Rows. If true, each output Row is formed by a concatenation of the input Row with the corresponding Row produced by SageMaker invocation, produced by responseRowDeserializer. If false, each output Row is just taken from responseRowDeserializer. namePolicy (NamePolicy): The NamePolicy to use when naming SageMaker entities created during usage of the returned model. uid (String): The unique identifier of the SageMakerModel. Used to represent the stage in Spark ML pipelines. Returns: JavaSageMakerModel: A JavaSageMakerModel that sends InvokeEndpoint requests to an endpoint hosting the training job's model. """ scala_function = "%s.fromModelS3Path" % SageMakerModel._wrapped_class if modelEnvironmentVariables is None: modelEnvironmentVariables = {} return SageMakerJavaWrapper()._new_java_obj( scala_function, modelPath, modelImage, modelExecutionRoleARN, endpointInstanceType, endpointInitialInstanceCount, requestRowSerializer, responseRowDeserializer, modelEnvironmentVariables, endpointCreationPolicy, sagemakerClient, prependResultRows, namePolicy, uid)