Beispiel #1
0
def sagemaker_model(image: str,
                    hyperparams: dict,
                    role: str,
                    output_dir: str,
                    region_name: str = 'us-east-1',
                    instance_type: str = 'ml.m4.xlarge'):
    """

    :param output_dir:
    :param image:
    :param hyperparams:
    :param role:
    :param instance_type:
    :param region_name:
    :return:
    """
    if image == 'xgboost':
        input_mode = 'File'
        container = get_image_uri(region_name, image, '0.90-2')
    else:
        input_mode = 'Pipe'
        container = get_image_uri(region_name, image)
    model = sagemaker.estimator.Estimator(container,
                                          role=role,
                                          input_mode=input_mode,
                                          train_instance_count=1,
                                          output_path=output_dir,
                                          train_instance_type=instance_type,
                                          train_use_spot_instances=True,
                                          train_max_run=300,
                                          train_max_wait=600)
    model.set_hyperparameters(**hyperparams)
    return model
def test_get_xgboost_image_uri_throws_error_for_unsupported_version():
    with pytest.raises(ValueError) as error:
        get_image_uri(REGION, "xgboost", "99.99-9")
    assert "SageMaker XGBoost version 99.99-9 is not supported" in str(error)

    with pytest.raises(ValueError) as error:
        get_image_uri(REGION, "xgboost", "0.90-1-gpu-py3")
    assert "SageMaker XGBoost version 0.90-1-gpu-py3 is not supported" in str(
        error)
def test_get_xgboost_image_uri():
    legacy_xgb_image_uri = get_image_uri(REGION, "xgboost")
    assert legacy_xgb_image_uri == "433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:1"

    updated_xgb_image_uri = get_image_uri(REGION, "xgboost", "0.90-1")
    assert (
        updated_xgb_image_uri ==
        "246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-xgboost:0.90-1-cpu-py3"
    )
def test_gov_ecr_uri():
    assert (
        get_image_uri("us-gov-west-1", "kmeans", "latest")
        == "226302683700.dkr.ecr.us-gov-west-1.amazonaws.com/kmeans:latest"
    )

    assert (
        get_image_uri("us-iso-east-1", "kmeans", "latest")
        == "490574956308.dkr.ecr.us-iso-east-1.c2s.ic.gov/kmeans:latest"
    )
Beispiel #5
0
def test_inference_pipeline_model_deploy_with_update_endpoint(
    sagemaker_session, cpu_instance_type, alternative_cpu_instance_type
):
    sparkml_data_path = os.path.join(DATA_DIR, "sparkml_model")
    xgboost_data_path = os.path.join(DATA_DIR, "xgboost_model")
    endpoint_name = "test-inference-pipeline-deploy-{}".format(sagemaker_timestamp())
    sparkml_model_data = sagemaker_session.upload_data(
        path=os.path.join(sparkml_data_path, "mleap_model.tar.gz"),
        key_prefix="integ-test-data/sparkml/model",
    )
    xgb_model_data = sagemaker_session.upload_data(
        path=os.path.join(xgboost_data_path, "xgb_model.tar.gz"),
        key_prefix="integ-test-data/xgboost/model",
    )

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        sparkml_model = SparkMLModel(
            model_data=sparkml_model_data,
            env={"SAGEMAKER_SPARKML_SCHEMA": SCHEMA},
            sagemaker_session=sagemaker_session,
        )
        xgb_image = get_image_uri(sagemaker_session.boto_region_name, "xgboost")
        xgb_model = Model(
            model_data=xgb_model_data, image=xgb_image, sagemaker_session=sagemaker_session
        )
        model = PipelineModel(
            models=[sparkml_model, xgb_model],
            role="SageMakerRole",
            sagemaker_session=sagemaker_session,
        )
        model.deploy(1, alternative_cpu_instance_type, endpoint_name=endpoint_name)
        old_endpoint = sagemaker_session.sagemaker_client.describe_endpoint(
            EndpointName=endpoint_name
        )
        old_config_name = old_endpoint["EndpointConfigName"]

        model.deploy(1, cpu_instance_type, update_endpoint=True, endpoint_name=endpoint_name)

        # Wait for endpoint to finish updating
        # Endpoint update takes ~7min. 40 retries * 30s sleeps = 20min timeout
        for _ in retries(40, "Waiting for 'InService' endpoint status", seconds_to_sleep=30):
            new_endpoint = sagemaker_session.sagemaker_client.describe_endpoint(
                EndpointName=endpoint_name
            )
            if new_endpoint["EndpointStatus"] == "InService":
                break

        new_config_name = new_endpoint["EndpointConfigName"]
        new_config = sagemaker_session.sagemaker_client.describe_endpoint_config(
            EndpointConfigName=new_config_name
        )

        assert old_config_name != new_config_name
        assert new_config["ProductionVariants"][0]["InstanceType"] == cpu_instance_type
        assert new_config["ProductionVariants"][0]["InitialInstanceCount"] == 1

    model.delete_model()
    with pytest.raises(Exception) as exception:
        sagemaker_session.sagemaker_client.describe_model(ModelName=model.name)
        assert "Could not find model" in str(exception.value)
Beispiel #6
0
def test_byo_estimator(sagemaker_session, region, cpu_instance_type):
    """Use Factorization Machines algorithm as an example here.

    First we need to prepare data for training. We take standard data set, convert it to the
    format that the algorithm can process and upload it to S3.
    Then we create the Estimator and set hyperparamets as required by the algorithm.
    Next, we can call fit() with path to the S3.
    Later the trained model is deployed and prediction is called against the endpoint.
    Default predictor is updated with json serializer and deserializer.

    """
    image_name = get_image_uri(region, "factorization-machines")
    training_data_path = os.path.join(DATA_DIR, "dummy_tensor")
    job_name = unique_name_from_base("byo")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
        pickle_args = {} if sys.version_info.major == 2 else {
            "encoding": "latin1"
        }

        with gzip.open(data_path, "rb") as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        prefix = "test_byo_estimator"
        key = "recordio-pb-data"

        s3_train_data = sagemaker_session.upload_data(path=training_data_path,
                                                      key_prefix=os.path.join(
                                                          prefix, "train",
                                                          key))

        estimator = Estimator(
            image_name=image_name,
            role="SageMakerRole",
            train_instance_count=1,
            train_instance_type=cpu_instance_type,
            sagemaker_session=sagemaker_session,
        )

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type="binary_classifier")

        # training labels must be 'float32'
        estimator.fit({"train": s3_train_data}, job_name=job_name)

    with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session):
        model = estimator.create_model()
        predictor = model.deploy(1, cpu_instance_type, endpoint_name=job_name)
        predictor.serializer = fm_serializer
        predictor.content_type = "application/json"
        predictor.deserializer = sagemaker.predictor.json_deserializer

        result = predictor.predict(train_set[0][:10])

        assert len(result["predictions"]) == 10
        for prediction in result["predictions"]:
            assert prediction["score"] is not None
Beispiel #7
0
def trained_estimator_from_hyperparams(s3_train_data,
                                       hyperparams,
                                       output_path,
                                       s3_test_data=None):
    """
    Create an Estimator from the given hyperparams, fit to training data, 
    and return a deployed predictor
    
    """
    # set up the estimator
    knn = sagemaker.estimator.Estimator(
        get_image_uri(boto3.Session().region_name, "knn"),
        role,  # COMMENTED OUT get_execution_role() and replaced with the created role
        train_instance_count=1,
        train_instance_type='ml.m5.2xlarge',
        output_path=output_path,
        sagemaker_session=sagemaker.Session())
    knn.set_hyperparameters(**hyperparams)

    # train a model. fit_input contains the locations of the train and test data
    fit_input = {'train': s3_train_data}
    if s3_test_data is not None:
        fit_input['test'] = s3_test_data
    knn.fit(fit_input)
    return knn
def sagemakerTrain():
    try:
        # get the ARN of the executing role (to pass to Sagemaker for training)
        role = 'arn:aws:iam::056149205531:role/service-role/AmazonSageMaker-ExecutionRole-20180112T102983'
        s3_train_data = 's3://{}/train/{}'.format(bucket, dataset)
        container = get_image_uri(boto3.Session().region_name,
                                  'linear-learner')

        session = sagemaker.Session()

        # set up the training params
        linear = sagemaker.estimator.Estimator(
            container,
            role,
            train_instance_count=1,
            train_instance_type='ml.c4.xlarge',
            output_path=output_location,
            sagemaker_session=session)

        # set up the hyperparameters
        linear.set_hyperparameters(feature_dim=13,
                                   predictor_type='regressor',
                                   epochs=10,
                                   loss='absolute_loss',
                                   optimizer='adam',
                                   mini_batch_size=200)

        linear.fit({'train': s3_train_data}, wait=False)

    except Exception as err:
        logger.error(
            "Error while launching SageMaker training: {}".format(err))
def trainModel():

	sess = sagemaker.Session()
	container = get_image_uri(region, 'xgboost')
	
	YColumns = ['result']
	numericalCols = ['guarantee_percentage', 'container_id_label']
	categoricalCols = [ 'component_name', 'slot_names', 'container_type', 'component_namespace',
						'component_display_name', 'customer_targeting', 'site']

	columns_to_keep = YColumns + numericalCols + categoricalCols

	output_path_str = 's3://{}/{}/sagemaker-' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
	xgb = sagemaker.estimator.Estimator(container, role, 
                                    train_instance_count=1, 
                                    train_instance_type='ml.m4.xlarge',
                                    output_path=output_path_str.format(input_bucket, 'results'),
                                    sagemaker_session=sess)

	xgb.set_hyperparameters('objective' : 'multi:softmax',
	    'colsample_bytree' : 0.3,
	    'learning_rate' : 0.3, 
	    'max_depth' : 16,
	    'alpha' : 5,
	    'num_class': 6,
	    'n_estimators' : 200,
	    'num_round': 200)

	input_prefix = 'inputs'
	s3_input_train = sagemaker.s3_input(s3_data='s3://{}/{}/{}'.format(input_bucket, input_prefix, s3_training_file), content_type='csv')
	s3_input_validation = sagemaker.s3_input(s3_data='s3://{}/{}/{}'.format(input_bucket, input_prefix, s3_training_file), content_type='csv')
	
	xgb.fit({'train': s3_input_train, 'validation': s3_input_validation})
	saveModel(xgb, columns_to_keep)
	return
def test_tf_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_instance_type):
    with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):
        tf = TensorFlow(
            image_name=get_image_uri(
                sagemaker_session.boto_session.region_name, "factorization-machines"
            ),
            entry_point=SCRIPT,
            role=ROLE,
            train_instance_count=SINGLE_INSTANCE_COUNT,
            train_instance_type=cpu_instance_type,
            sagemaker_session=sagemaker_session,
            script_mode=True,
            framework_version=TensorFlow.LATEST_VERSION,
            py_version=PYTHON_VERSION,
            metric_definitions=[
                {"Name": "train:global_steps", "Regex": r"global_step\/sec:\s(.*)"}
            ],
        )
        inputs = tf.sagemaker_session.upload_data(
            path=os.path.join(TF_MNIST_RESOURCE_PATH, "data"), key_prefix="scriptmode/mnist"
        )

        training_config = _build_airflow_workflow_tf(
            estimator=tf, instance_type=cpu_instance_type, inputs=inputs
        )

        _assert_that_s3_url_contains_data(
            sagemaker_session,
            training_config["HyperParameters"]["sagemaker_submit_directory"].strip('"'),
        )
def test_byo_airflow_config_uploads_data_source_to_s3_when_inputs_provided(
    sagemaker_session, cpu_instance_type
):
    with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):
        training_data_path = os.path.join(DATA_DIR, "dummy_tensor")

        data_source_location = "test-airflow-config-{}".format(sagemaker_timestamp())
        inputs = sagemaker_session.upload_data(
            path=training_data_path, key_prefix=os.path.join(data_source_location, "train")
        )

        estimator = Estimator(
            image_name=get_image_uri(
                sagemaker_session.boto_session.region_name, "factorization-machines"
            ),
            role=ROLE,
            train_instance_count=SINGLE_INSTANCE_COUNT,
            train_instance_type=cpu_instance_type,
            sagemaker_session=sagemaker_session,
        )

        training_config = _build_airflow_workflow(
            estimator=estimator, instance_type=cpu_instance_type, inputs=inputs
        )

        _assert_that_s3_url_contains_data(
            sagemaker_session,
            training_config["InputDataConfig"][0]["DataSource"]["S3DataSource"]["S3Uri"],
        )
Beispiel #12
0
def make_estimator(job_name,
                   s3_output,
                   input_mode='Pipe',
                   train_instance_count=1,
                   train_instance_type='ml.p2.xlarge',
                   train_volume_size=30,
                   train_max_run=360000):

    role = get_execution_role()
    sess = sagemaker.Session()
    training_image = get_image_uri(sess.boto_region_name,
                                   'image-classification',
                                   repo_version="latest")

    estimator = sagemaker.estimator.Estimator(
        training_image,
        role,
        train_instance_count=train_instance_count,
        train_instance_type=train_instance_type,
        train_volume_size=train_volume_size,
        train_max_run=train_max_run,
        input_mode=input_mode,
        output_path=s3_output,
        sagemaker_session=sess,
        base_job_name=job_name)

    return estimator
Beispiel #13
0
def test_inference_pipeline_batch_transform(sagemaker_session):
    sparkml_model_data = sagemaker_session.upload_data(
        path=os.path.join(SPARKML_DATA_PATH, 'mleap_model.tar.gz'),
        key_prefix='integ-test-data/sparkml/model')
    xgb_model_data = sagemaker_session.upload_data(
        path=os.path.join(XGBOOST_DATA_PATH, 'xgb_model.tar.gz'),
        key_prefix='integ-test-data/xgboost/model')
    batch_job_name = 'test-inference-pipeline-batch-{}'.format(
        sagemaker_timestamp())
    sparkml_model = SparkMLModel(model_data=sparkml_model_data,
                                 env={'SAGEMAKER_SPARKML_SCHEMA': SCHEMA},
                                 sagemaker_session=sagemaker_session)
    xgb_image = get_image_uri(sagemaker_session.boto_region_name, 'xgboost')
    xgb_model = Model(model_data=xgb_model_data,
                      image=xgb_image,
                      sagemaker_session=sagemaker_session)
    model = PipelineModel(models=[sparkml_model, xgb_model],
                          role='SageMakerRole',
                          sagemaker_session=sagemaker_session,
                          name=batch_job_name)
    transformer = model.transformer(1, 'ml.m4.xlarge')
    transform_input_key_prefix = 'integ-test-data/sparkml_xgboost/transform'
    transform_input = transformer.sagemaker_session.upload_data(
        path=VALID_DATA_PATH, key_prefix=transform_input_key_prefix)

    with timeout_and_delete_model_with_transformer(
            transformer,
            sagemaker_session,
            minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES):
        transformer.transform(transform_input,
                              content_type=CONTENT_TYPE_CSV,
                              job_name=batch_job_name)
        transformer.wait()
Beispiel #14
0
def create_blaxing_text_model(
        region_name: str,
        sm_session: Session,
        sm_role: str,
        s3_input_url: str,
        s3_output_url: str):
    """
    Create a BlazingText model.

    Args:
        - region_name: AWS Region Name to use SageMaker in.
        - sm_session: SageMaker Session Object.
        - sm_role: SageMaker role arn that allows SM to connect to s3.
        - s3_input_url: training data input path on s3
        - s3_output_url: model artifacts output path

    Return:
        - bt_model: instance of Estimator, can be used to deploy an inference endpoint
    """
    # define container
    container = get_image_uri(region_name, "blazingtext", "latest")

    # create estimator
    bt_model = Estimator(container,
                         sm_role,
                         train_instance_count=1,
                         train_instance_type='ml.c4.2xlarge',
                         train_volume_size=30,
                         train_max_run=360000,
                         input_mode='File',
                         output_path=s3_output_url,
                         sagemaker_session=sm_session)

    # set hyperparameters
    bt_model.set_hyperparameters(mode="skipgram",
                                 epochs=5,
                                 min_count=5,
                                 sampling_threshold=0.0001,
                                 learning_rate=0.05,
                                 window_size=5,
                                 vector_dim=100,
                                 negative_samples=5,
                                 subwords=True,
                                 min_char=3,
                                 max_char=6,
                                 batch_size=11,
                                 evaluation=True)

    # define data channels
    train_data = s3_input(s3_input_url, distribution='FullyReplicated',
                          content_type='text/plain', s3_data_type='S3Prefix')
    data_channels = {'train': train_data}

    # fit model
    bt_model.fit(inputs=data_channels, logs=True)

    return bt_model
Beispiel #15
0
def test_async_byo_estimator(sagemaker_session, region, cpu_instance_type):
    image_name = get_image_uri(region, "factorization-machines")
    endpoint_name = unique_name_from_base("byo")
    training_data_path = os.path.join(DATA_DIR, "dummy_tensor")
    job_name = unique_name_from_base("byo")

    with timeout(minutes=5):
        data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
        pickle_args = {} if sys.version_info.major == 2 else {
            "encoding": "latin1"
        }

        with gzip.open(data_path, "rb") as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        prefix = "test_byo_estimator"
        key = "recordio-pb-data"

        s3_train_data = sagemaker_session.upload_data(path=training_data_path,
                                                      key_prefix=os.path.join(
                                                          prefix, "train",
                                                          key))

        estimator = Estimator(
            image_name=image_name,
            role="SageMakerRole",
            train_instance_count=1,
            train_instance_type=cpu_instance_type,
            sagemaker_session=sagemaker_session,
        )

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type="binary_classifier")

        # training labels must be 'float32'
        estimator.fit({"train": s3_train_data}, wait=False, job_name=job_name)

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        estimator = Estimator.attach(training_job_name=job_name,
                                     sagemaker_session=sagemaker_session)
        model = estimator.create_model()
        predictor = model.deploy(1,
                                 cpu_instance_type,
                                 endpoint_name=endpoint_name)
        predictor.serializer = fm_serializer
        predictor.content_type = "application/json"
        predictor.deserializer = sagemaker.predictor.json_deserializer

        result = predictor.predict(train_set[0][:10])

        assert len(result["predictions"]) == 10
        for prediction in result["predictions"]:
            assert prediction["score"] is not None

        assert estimator.train_image() == image_name
Beispiel #16
0
    def _do_training(self):
        self._logger.info('Training data is located in: {}'.format(
            self._data_s3_url))
        self._logger.info('Artifacts will be located in: {}'.format(
            self._output_location))

        self._job_name = 'kmeans-batch-training-' + strftime(
            "%Y-%m-%d-%H-%M-%S", gmtime())
        image = get_image_uri(boto3.Session().region_name, 'kmeans')

        create_training_params = \
            {
                "AlgorithmSpecification": {
                    "TrainingImage": image,
                    "TrainingInputMode": "File"
                },
                "RoleArn": self._ml_engine.iam_role,
                "OutputDataConfig": {
                    "S3OutputPath": self._output_location
                },
                "ResourceConfig": {
                    "InstanceCount": self._instance_count,
                    "InstanceType": self._instance_type,
                    "VolumeSizeInGB": self._volume_size_in_gb
                },
                "TrainingJobName": self._job_name,
                "HyperParameters": {
                    "k": str(self._hyper_parameter_k),
                    "epochs": str(self._epochs),
                    "feature_dim": str(self._num_features),
                    "mini_batch_size": str(self._mini_batch_size),
                    "force_dense": "True"
                },
                "StoppingCondition": {
                    "MaxRuntimeInSeconds": self._max_runtime_in_seconds
                },
                "InputDataConfig": [
                    {
                        "ChannelName": "train",
                        "DataSource": {
                            "S3DataSource": {
                                "S3DataType": "S3Prefix",
                                "S3Uri": self._data_s3_url,
                                "S3DataDistributionType": "FullyReplicated"
                            }
                        },
                        "CompressionType": "None",
                        "RecordWrapperType": "None"
                    }
                ]
            }

        self._logger.info("Creating training job ... {}".format(
            self._job_name))
        self._sagemaker_client.create_training_job(**create_training_params)
    def getcontainer(self, region):
        """
        xgboost specific code goes here to set up the training container
        :param region:
        :return:
        """
        from sagemaker.amazon.amazon_estimator import get_image_uri

        container = get_image_uri(region, 'xgboost')

        return container
def create_model(image: str, hyperparameters: dict, instance_type: str,
                 output_path: str, region_name: str, role: str, s3_train: str,
                 s3_validation: str, job_name: str):
    if image == 'xgboost':
        container = get_image_uri(region_name, image, '0.90-2')
    else:
        container = get_image_uri(region_name, image)
    save_interval = '1'
    model = sagemaker.estimator.Estimator(
        container,
        role=role,
        train_instance_count=1,
        train_instance_type=instance_type,
        train_use_spot_instances=True,
        train_max_run=300,
        train_max_wait=600,
        output_path=output_path,
        debugger_hook_config=DebuggerHookConfig(
            s3_output_path=f's3://{bucket}/{prefix}/debug',
            collection_configs=[
                CollectionConfig(name='metrics',
                                 parameters={'save_interval': save_interval}),
                CollectionConfig(name='feature_importance',
                                 parameters={'save_interval': save_interval}),
                CollectionConfig(name='full_shap',
                                 parameters={'save_interval': save_interval}),
                CollectionConfig(name='average_shap',
                                 parameters={'save_interval': save_interval})
            ]),
        rules=[
            Rule.sagemaker(rule_configs.class_imbalance(),
                           rule_parameters={'collection_names': 'metrics'})
        ])
    model.set_hyperparameters(**hyperparameters)
    data_channel = {
        'train': s3_input(s3_train, content_type='text/csv'),
        'validation': s3_input(s3_validation, content_type='text/csv')
    }
    model.fit(data_channel, job_name=job_name)
    return model
Beispiel #19
0
def test_inference_pipeline_model_deploy(sagemaker_session, cpu_instance_type):
    sparkml_data_path = os.path.join(DATA_DIR, "sparkml_model")
    xgboost_data_path = os.path.join(DATA_DIR, "xgboost_model")
    endpoint_name = "test-inference-pipeline-deploy-{}".format(
        sagemaker_timestamp())
    sparkml_model_data = sagemaker_session.upload_data(
        path=os.path.join(sparkml_data_path, "mleap_model.tar.gz"),
        key_prefix="integ-test-data/sparkml/model",
    )
    xgb_model_data = sagemaker_session.upload_data(
        path=os.path.join(xgboost_data_path, "xgb_model.tar.gz"),
        key_prefix="integ-test-data/xgboost/model",
    )

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        sparkml_model = SparkMLModel(
            model_data=sparkml_model_data,
            env={"SAGEMAKER_SPARKML_SCHEMA": SCHEMA},
            sagemaker_session=sagemaker_session,
        )
        xgb_image = get_image_uri(sagemaker_session.boto_region_name,
                                  "xgboost")
        xgb_model = Model(model_data=xgb_model_data,
                          image=xgb_image,
                          sagemaker_session=sagemaker_session)
        model = PipelineModel(
            models=[sparkml_model, xgb_model],
            role="SageMakerRole",
            sagemaker_session=sagemaker_session,
            name=endpoint_name,
        )
        model.deploy(1, cpu_instance_type, endpoint_name=endpoint_name)
        predictor = RealTimePredictor(
            endpoint=endpoint_name,
            sagemaker_session=sagemaker_session,
            serializer=json_serializer,
            content_type=CONTENT_TYPE_CSV,
            accept=CONTENT_TYPE_CSV,
        )

        with open(VALID_DATA_PATH, "r") as f:
            valid_data = f.read()
            assert predictor.predict(valid_data) == "0.714013934135"

        with open(INVALID_DATA_PATH, "r") as f:
            invalid_data = f.read()
            assert predictor.predict(invalid_data) is None

    model.delete_model()
    with pytest.raises(Exception) as exception:
        sagemaker_session.sagemaker_client.describe_model(ModelName=model.name)
        assert "Could not find model" in str(exception.value)
Beispiel #20
0
def inference_pipeline_ep(role, sess, spark_model_uri, region, bucket,
                          pipeline_model_name, endpoint_name, **context):
    timestamp_prefix = Variable.get("timestamp")
    # sm = boto3.client('sagemaker', region_name=region)
    s3client = boto3.client('s3', region_name=region)

    s3_sparkml_data_uri = spark_model_uri

    # Using S3 calls for listing model artifcats
    s3_xgb_objects = s3client.list_objects_v2(
        Bucket=bucket, StartAfter='sagemaker/spark-preprocess/model/xgboost/')
    obj_list = s3_xgb_objects['Contents']
    obj_list.sort(key=lambda x: x['LastModified'], reverse=False)
    xgboost_model_latest = obj_list[-1]['Key']
    s3_xgboost_model_uri = 's3://' + bucket + '/' + xgboost_model_latest

    # AirFlow XCOM feature
    # s3_xgboost_model_uri = context['task_instance'].xcom_pull(
    #    task_ids='xgboost_model_training')['Training']['ModelArtifacts']['S3ModelArtifacts']

    xgb_container = get_image_uri(sess.region_name,
                                  'xgboost',
                                  repo_version='0.90-1')

    schema_json = schema_utils.abalone_schema()

    sparkml_model = SparkMLModel(
        model_data=s3_sparkml_data_uri,
        role=role,
        sagemaker_session=sagemaker.session.Session(sess),
        env={'SAGEMAKER_SPARKML_SCHEMA': schema_json})

    xgb_model = Model(model_data=s3_xgboost_model_uri,
                      role=role,
                      sagemaker_session=sagemaker.session.Session(sess),
                      image=xgb_container)

    pipeline_model_name = pipeline_model_name

    sm_model = PipelineModel(name=pipeline_model_name,
                             role=role,
                             sagemaker_session=sagemaker.session.Session(sess),
                             models=[sparkml_model, xgb_model])

    endpoint_name = endpoint_name

    sm_model.deploy(initial_instance_count=1,
                    instance_type='ml.c4.xlarge',
                    endpoint_name=endpoint_name)
    def __init__(self, name, training_resource_config, region, repo_version):

        self.algo_name = name
        self.training_resource_config = training_resource_config
        self.region = region
        self.repo_version = repo_version

        if self.algo_name == "xgboost":
            self.algo_image_uri = default_framework_uri(
                framework=self.algo_name, region_name=region, image_tag=repo_version
            )
        else:
            self.algo_image_uri = get_image_uri(
                region_name=region, repo_name=self.algo_name, repo_version=repo_version
            )
Beispiel #22
0
def train_model(s3_model_output_location, s3_training_file_location):

    # crea i ruoli necessari per la creazione di endpoint e per l'uso di sagemaker
    role = sagemaker_role.create_role_sagemaker()

    # chiamata di creazione del ruolo e' asincrona
    sleep(20)

    # Build Model
    sess = sagemaker.Session()

    # Access appropriate algorithm container image
    #  Specify how many instances to use for distributed training and what type of machine to use
    #  Finally, specify where the trained model artifacts needs to be stored
    #   Reference: http://sagemaker.readthedocs.io/en/latest/estimators.html
    container_path = get_image_uri(boto3.Session().region_name,
                                   'xgboost',
                                   repo_version='0.90-1')

    estimator = sagemaker.estimator.Estimator(
        container_path,
        role,
        train_instance_count=1,
        train_instance_type='ml.m5.large',
        output_path=s3_model_output_location,
        sagemaker_session=sess,
        base_job_name='xgboost-fall-v1')

    # Specify hyper parameters that appropriate for the training algorithm
    # XGBoost Training Parameter Reference:
    #   https://github.com/dmlc/xgboost/blob/master/doc/parameter.md

    # max_depth=5,eta=0.1,subsample=0.7,num_round=150
    estimator.set_hyperparameters(max_depth=6,
                                  objective="reg:linear",
                                  eta=0.12,
                                  subsample=0.73,
                                  num_round=200)

    estimator.hyperparameters()

    # content type can be libsvm or csv for XGBoost
    training_input_config = sagemaker.session.s3_input(
        s3_data=s3_training_file_location, content_type="csv")

    estimator.fit({'train': training_input_config})

    return estimator
Beispiel #23
0
def test_inference_pipeline_model_deploy(sagemaker_session):
    sparkml_data_path = os.path.join(DATA_DIR, 'sparkml_model')
    xgboost_data_path = os.path.join(DATA_DIR, 'xgboost_model')
    endpoint_name = 'test-inference-pipeline-deploy-{}'.format(
        sagemaker_timestamp())
    sparkml_model_data = sagemaker_session.upload_data(
        path=os.path.join(sparkml_data_path, 'mleap_model.tar.gz'),
        key_prefix='integ-test-data/sparkml/model')
    xgb_model_data = sagemaker_session.upload_data(
        path=os.path.join(xgboost_data_path, 'xgb_model.tar.gz'),
        key_prefix='integ-test-data/xgboost/model')

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        sparkml_model = SparkMLModel(model_data=sparkml_model_data,
                                     env={'SAGEMAKER_SPARKML_SCHEMA': SCHEMA},
                                     sagemaker_session=sagemaker_session)
        xgb_image = get_image_uri(sagemaker_session.boto_region_name,
                                  'xgboost')
        xgb_model = Model(model_data=xgb_model_data,
                          image=xgb_image,
                          sagemaker_session=sagemaker_session)
        model = PipelineModel(models=[sparkml_model, xgb_model],
                              role='SageMakerRole',
                              sagemaker_session=sagemaker_session,
                              name=endpoint_name)
        model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name)
        predictor = RealTimePredictor(endpoint=endpoint_name,
                                      sagemaker_session=sagemaker_session,
                                      serializer=json_serializer,
                                      content_type=CONTENT_TYPE_CSV,
                                      accept=CONTENT_TYPE_CSV)

        with open(VALID_DATA_PATH, 'r') as f:
            valid_data = f.read()
            assert predictor.predict(valid_data) == '0.714013934135'

        with open(INVALID_DATA_PATH, 'r') as f:
            invalid_data = f.read()
            assert (predictor.predict(invalid_data) is None)

    model.delete_model()
    with pytest.raises(Exception) as exception:
        sagemaker_session.sagemaker_client.describe_model(ModelName=model.name)
        assert 'Could not find model' in str(exception.value)
Beispiel #24
0
def estimator_knn(sagemaker_session, cpu_instance_type):
    knn_image = get_image_uri(sagemaker_session.boto_session.region_name,
                              "knn",
                              repo_version="1")

    estimator = Estimator(
        image_name=knn_image,
        role=EXECUTION_ROLE,
        train_instance_count=1,
        train_instance_type=cpu_instance_type,
        sagemaker_session=sagemaker_session,
    )

    estimator.set_hyperparameters(k=10,
                                  sample_size=500,
                                  feature_dim=784,
                                  mini_batch_size=100,
                                  predictor_type="regressor")
    return estimator
Beispiel #25
0
def create_knn():
    role = 'CS218WebApp'
    params = {
        'feature_dim': session['feature_size'],
        'predictor_type': 'classifier',
        'k': session['k'],
        'sample_size': session['sample_size']
    }
    estimator = sagemaker.estimator.Estimator(
        get_image_uri(boto3.Session().region_name, "knn"),
        role=role,
        train_instance_count=1,
        train_instance_type='ml.m5.2xlarge',
        sagemaker_session=sagemaker.Session(),
        hyperparameters=params)

    fit_input = {'train': session['train'], 'test': session['test']}
    estimator.fit(fit_input)
    return estimator
Beispiel #26
0
def estimator_fm(sagemaker_session, cpu_instance_type):
    fm_image = get_image_uri(sagemaker_session.boto_session.region_name,
                             "factorization-machines",
                             repo_version="1")

    estimator = Estimator(
        image_name=fm_image,
        role=EXECUTION_ROLE,
        train_instance_count=1,
        train_instance_type=cpu_instance_type,
        sagemaker_session=sagemaker_session,
    )

    estimator.set_hyperparameters(num_factors=10,
                                  feature_dim=784,
                                  mini_batch_size=100,
                                  predictor_type="regressor")

    return estimator
Beispiel #27
0
    def _to_estimator_conf(self, task):
        from sagemaker.amazon.amazon_estimator import get_image_uri

        return {
            "image_name":
            get_image_uri(task.region, task.estimator_config.algorithm),
            "role":
            task.sagemaker_role,
            "train_instance_count":
            task.estimator_config.train_instance_count,
            "train_instance_type":
            task.estimator_config.train_instance_type,
            "train_volume_size":
            task.estimator_config.train_volume_size,
            "output_path":
            str(task.output_path),
            "base_job_name":
            task.estimator_config.base_job_name,
            "hyperparameters":
            task.estimator_config.hyperparameters,
        }
    def _create_model(self):
        self._model_name = "Kmeans-model-{}".format(
            strftime("%Y-%m-%d-%H-%M-%S", gmtime()))
        self._logger.info("Creating SageMaker KMeans model ... {}".format(
            self._model_name))

        primary_container = {
            'Image':
            get_image_uri(self._sagemaker_session.boto_region_name, 'kmeans'),
            'ModelDataUrl':
            self._model_s3_filepath
        }

        create_model_response = self._sagemaker_client.create_model(
            ModelName=self._model_name,
            ExecutionRoleArn=self._ml_engine.iam_role,
            PrimaryContainer=primary_container)
        model_arn = create_model_response['ModelArn']
        self._logger.info(
            "Model created successfully! name: {}, arn: {}".format(
                self._model_name, model_arn))
Beispiel #29
0
def submit_training_job(path_to_train_data, bucket, formatted_data):
    output_prefix = 'train_output'
    role = 'arn:aws:iam::450246219423:role/service-role/AmazonSageMaker-ExecutionRole-20200426T181822'

    train_data_path = path_to_train_data

    # path_to_test_data = f's3://ml-web-app/test/test.protobuf'
    # job_name = 'iris-train'

    output_path = 's3://{}/{}/factorization_machine_output'.format(bucket, output_prefix)

    container = get_image_uri(boto3.Session(region_name='us-west-1').region_name, 'factorization-machines')

    estimator = sagemaker.estimator.Estimator(container, role, train_instance_count=1,
                                              train_instance_type='ml.c4.xlarge', output_path=output_path,
                                              sagemaker_session=sagemaker.Session())

    estimator.set_hyperparameters(feature_dim=formatted_data.shape[1], predictor_type='regressor', num_factors=64)

    # run training job

    estimator.fit({'train': train_data_path})
def train_model_deploy(args):

    backup_bucket = args.s3_backup_bucket
    sagemaker_bucket = args.s3_sagemaker_bucket
    role = args.role_arn
    sm_prefix = 'demo-breast-cancer-prediction'
    # Get Docker image for linear-learner
    container = get_image_uri(boto3.Session().region_name, 'linear-learner')

    # Find the latest item from the backup bucket
    objs = s3.list_objects(Bucket=backup_bucket)
    key_time = [(item['Key'], item['LastModified'])
                for item in objs['Contents']]
    key_time = sorted(key_time, key=lambda tup: tup[1], reverse=True)
    s3_file_key = key_time[0][0]

    print('Variables intialized as:')
    print(f'Backup Bucket {backup_bucket}')
    print(f'Backup File Key {s3_file_key}')
    print(f'Role ARN {role}')
    print(f'Sagemaker Bucket {sagemaker_bucket}')
    print(f'Sagemaker Prefix {sm_prefix}')
    print(f'Container {container}')

    try:
        data = load_backup_data(backup_bucket, s3_file_key)
        train_X, train_y, val_X, val_y, test_X, test_y = split_data(data)
        save_train_val_to_s3(sagemaker_bucket, sm_prefix, train_X, train_y,
                             val_X, val_y)
        linear_job = create_training_job(container, sagemaker_bucket,
                                         sm_prefix, role)
        model_name = linear_job
        create_model(container, role, linear_job, model_name)
        linear_endpoint = create_or_update_endpoint(model_name)
        test_endpoint(linear_endpoint, test_X, test_y, train_X, train_y)
        print('Success')
    except Exception as e:
        print(e)
        sys.exit()