예제 #1
0
def test_create_model_from_estimator(sagemaker_session, xgboost_version):
    container_log_level = '"logging.INFO"'
    source_dir = "s3://mybucket/source"
    xgboost = XGBoost(
        entry_point=SCRIPT_PATH,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        train_instance_type=INSTANCE_TYPE,
        train_instance_count=1,
        framework_version=xgboost_version,
        container_log_level=container_log_level,
        py_version=PYTHON_VERSION,
        base_job_name="job",
        source_dir=source_dir,
    )

    job_name = "new_name"
    xgboost.fit(inputs="s3://mybucket/train", job_name=job_name)
    model = xgboost.create_model()

    assert model.sagemaker_session == sagemaker_session
    assert model.framework_version == xgboost_version
    assert model.py_version == xgboost.py_version
    assert model.entry_point == SCRIPT_PATH
    assert model.role == ROLE
    assert model.name == job_name
    assert model.container_log_level == container_log_level
    assert model.source_dir == source_dir
    assert model.vpc_config is None
예제 #2
0
def test_create_model_with_optional_params(sagemaker_session):
    container_log_level = '"logging.INFO"'
    source_dir = "s3://mybucket/source"
    enable_cloudwatch_metrics = "true"
    xgboost = XGBoost(
        entry_point=SCRIPT_PATH,
        role=ROLE,
        framework_version=XGBOOST_LATEST_VERSION,
        sagemaker_session=sagemaker_session,
        train_instance_type=INSTANCE_TYPE,
        train_instance_count=1,
        container_log_level=container_log_level,
        py_version=PYTHON_VERSION,
        base_job_name="job",
        source_dir=source_dir,
        enable_cloudwatch_metrics=enable_cloudwatch_metrics,
    )

    xgboost.fit(inputs="s3://mybucket/train", job_name="new_name")

    new_role = "role"
    model_server_workers = 2
    vpc_config = {"Subnets": ["foo"], "SecurityGroupIds": ["bar"]}
    model = xgboost.create_model(
        role=new_role, model_server_workers=model_server_workers, vpc_config_override=vpc_config
    )

    assert model.role == new_role
    assert model.model_server_workers == model_server_workers
    assert model.vpc_config == vpc_config
예제 #3
0
def test_attach_custom_image(sagemaker_session):
    training_image = "1.dkr.ecr.us-west-2.amazonaws.com/my_custom_xgboost_image:latest"
    returned_job_description = {
        "AlgorithmSpecification": {"TrainingInputMode": "File", "TrainingImage": training_image},
        "HyperParameters": {
            "sagemaker_submit_directory": '"s3://some/sourcedir.tar.gz"',
            "sagemaker_program": '"iris-dnn-classifier.py"',
            "sagemaker_s3_uri_training": '"sagemaker-3/integ-test-data/tf_iris"',
            "sagemaker_container_log_level": '"logging.INFO"',
            "sagemaker_job_name": '"neo"',
            "training_steps": "100",
            "sagemaker_region": '"us-west-2"',
        },
        "RoleArn": "arn:aws:iam::366:role/SageMakerRole",
        "ResourceConfig": {
            "VolumeSizeInGB": 30,
            "InstanceCount": 1,
            "InstanceType": "ml.c4.xlarge",
        },
        "StoppingCondition": {"MaxRuntimeInSeconds": 24 * 60 * 60},
        "TrainingJobName": "neo",
        "TrainingJobStatus": "Completed",
        "TrainingJobArn": "arn:aws:sagemaker:us-west-2:336:training-job/neo",
        "OutputDataConfig": {"KmsKeyId": "", "S3OutputPath": "s3://place/output/neo"},
        "TrainingJobOutput": {"S3TrainingJobOutput": "s3://here/output.tar.gz"},
    }
    sagemaker_session.sagemaker_client.describe_training_job = Mock(
        name="describe_training_job", return_value=returned_job_description
    )

    with pytest.raises(TypeError) as error:
        XGBoost.attach(training_job_name="neo", sagemaker_session=sagemaker_session)
    assert "expected string" in str(error)
예제 #4
0
def test_training_with_network_isolation(
    sagemaker_session,
    xgboost_latest_version,
    xgboost_latest_py_version,
    cpu_instance_type,
):
    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        base_job_name = "test-network-isolation-xgboost"

        xgboost = XGBoost(
            entry_point=os.path.join(DATA_DIR, "xgboost_abalone",
                                     "abalone.py"),
            role=ROLE,
            instance_type=cpu_instance_type,
            instance_count=1,
            framework_version=xgboost_latest_version,
            py_version=xgboost_latest_py_version,
            base_job_name=base_job_name,
            sagemaker_session=sagemaker_session,
            enable_network_isolation=True,
        )

        train_input = xgboost.sagemaker_session.upload_data(
            path=os.path.join(DATA_DIR, "xgboost_abalone", "abalone"),
            key_prefix="integ-test-data/xgboost_abalone/abalone",
        )
        job_name = unique_name_from_base(base_job_name)
        xgboost.fit(inputs={"train": train_input}, job_name=job_name)
        assert sagemaker_session.sagemaker_client.describe_training_job(
            TrainingJobName=job_name)["EnableNetworkIsolation"]
예제 #5
0
def test_attach_wrong_framework(sagemaker_session):
    rjd = {
        "AlgorithmSpecification": {
            "TrainingInputMode": "File",
            "TrainingImage": "1.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet-py3-cpu:1.0.4",
        },
        "HyperParameters": {
            "sagemaker_submit_directory": '"s3://some/sourcedir.tar.gz"',
            "checkpoint_path": '"s3://other/1508872349"',
            "sagemaker_program": '"iris-dnn-classifier.py"',
            "sagemaker_container_log_level": '"logging.INFO"',
            "training_steps": "100",
            "sagemaker_region": '"us-west-2"',
        },
        "RoleArn": "arn:aws:iam::366:role/SageMakerRole",
        "ResourceConfig": {
            "VolumeSizeInGB": 30,
            "InstanceCount": 1,
            "InstanceType": "ml.c4.xlarge",
        },
        "StoppingCondition": {"MaxRuntimeInSeconds": 24 * 60 * 60},
        "TrainingJobName": "neo",
        "TrainingJobStatus": "Completed",
        "TrainingJobArn": "arn:aws:sagemaker:us-west-2:336:training-job/neo",
        "OutputDataConfig": {"KmsKeyId": "", "S3OutputPath": "s3://place/output/neo"},
        "TrainingJobOutput": {"S3TrainingJobOutput": "s3://here/output.tar.gz"},
    }
    sagemaker_session.sagemaker_client.describe_training_job = Mock(
        name="describe_training_job", return_value=rjd
    )

    with pytest.raises(ValueError) as error:
        XGBoost.attach(training_job_name="neo", sagemaker_session=sagemaker_session)
    assert "didn't use image for requested framework" in str(error)
def main():
    print('Starting model training.')
    print('Note: if launching for the first time in local mode, container image download might take a few minutes to complete.')

    hyperparameters = {
        "max_depth": "5",
        "eta": "0.2",
        "gamma": "4",
        "min_child_weight": "6",
        "subsample": "0.7",
        "objective": "reg:squarederror",
        "num_round": "50",
        "verbosity": "2",
    }

    xgb_script_mode_estimator = XGBoost(
        entry_point="./code/abalone.py",
        hyperparameters=hyperparameters,
        role=DUMMY_IAM_ROLE,
        instance_count=1,
        instance_type='local',
        framework_version="1.2-1"
    )

    train_input = TrainingInput("file://./data/train/abalone", content_type="text/libsvm")

    xgb_script_mode_estimator.fit({"train": train_input, "validation": train_input})

    print('Completed model training')

    model_data = xgb_script_mode_estimator.model_data
    print(model_data)

    xgb_inference_model = XGBoostModel(
        model_data=model_data,
        role=DUMMY_IAM_ROLE,
        entry_point="./code/inference.py",
        framework_version="1.2-1",
    )

    print('Deploying endpoint in local mode')
    predictor = xgb_inference_model.deploy(
        initial_instance_count=1,
        instance_type="local",
    )

    a_young_abalone = "6 1:3 2:0.37 3:0.29 4:0.095 5:0.249 6:0.1045 7:0.058 8:0.067"
    do_inference_on_local_endpoint(predictor, a_young_abalone)

    an_old_abalone = "15 1:1 2:0.655 3:0.53 4:0.175 5:1.2635 6:0.486 7:0.2635 8:0.415"
    do_inference_on_local_endpoint(predictor, an_old_abalone)

    print('About to delete the endpoint to stop paying (if in cloud mode).')
    predictor.delete_endpoint(predictor.endpoint_name)
예제 #7
0
def test_training_image_uri(sagemaker_session, xgboost_framework_version):
    xgboost = XGBoost(
        entry_point=SCRIPT_PATH,
        role=ROLE,
        framework_version=xgboost_framework_version,
        sagemaker_session=sagemaker_session,
        instance_type=INSTANCE_TYPE,
        instance_count=1,
        py_version=PYTHON_VERSION,
    )

    assert _get_full_image_uri(xgboost_framework_version) in xgboost.training_image_uri()
예제 #8
0
def test_train_image_default(sagemaker_session):
    xgboost = XGBoost(
        entry_point=SCRIPT_PATH,
        role=ROLE,
        framework_version=XGBOOST_LATEST_VERSION,
        sagemaker_session=sagemaker_session,
        train_instance_type=INSTANCE_TYPE,
        train_instance_count=1,
        py_version=PYTHON_VERSION,
    )

    assert _get_full_cpu_image_uri(XGBOOST_LATEST_VERSION) in xgboost.train_image()
예제 #9
0
def test_py2_xgboost_error(sagemaker_session, xgboost_framework_version):
    with pytest.raises(ValueError) as error1:
        XGBoost(
            entry_point=SCRIPT_PATH,
            role=ROLE,
            framework_version=xgboost_framework_version,
            sagemaker_session=sagemaker_session,
            instance_type=INSTANCE_TYPE,
            instance_count=1,
            py_version="py2",
        )

    with pytest.raises(ValueError) as error2:
        model = XGBoostModel(
            model_data=DATA_DIR,
            role=ROLE,
            sagemaker_session=sagemaker_session,
            entry_point=SCRIPT_PATH,
            framework_version=xgboost_framework_version,
            py_version="py2",
        )
        model.serving_image_uri(REGION, INSTANCE_TYPE)

    error_message = "Unsupported Python version: py2."
    assert error_message in str(error1)
    assert error_message in str(error2)
예제 #10
0
def test_py2_xgboost_attribute_error(sagemaker_session):
    with pytest.raises(AttributeError) as error1:
        XGBoost(
            entry_point=SCRIPT_PATH,
            role=ROLE,
            framework_version=XGBOOST_LATEST_VERSION,
            sagemaker_session=sagemaker_session,
            train_instance_type=INSTANCE_TYPE,
            train_instance_count=1,
            py_version="py2",
        )

    with pytest.raises(AttributeError) as error2:
        XGBoostModel(
            model_data=DATA_DIR,
            role=ROLE,
            sagemaker_session=sagemaker_session,
            entry_point=SCRIPT_PATH,
            framework_version=XGBOOST_LATEST_VERSION,
            py_version="py2",
        )

    error_message = "XGBoost container does not support Python 2, please use Python 3"
    assert error_message in str(error1)
    assert error_message in str(error2)
예제 #11
0
def test_create_model_with_optional_params(sagemaker_session):
    container_log_level = '"logging.INFO"'
    source_dir = "s3://mybucket/source"
    enable_cloudwatch_metrics = "true"
    xgboost = XGBoost(
        entry_point=SCRIPT_PATH,
        role=ROLE,
        framework_version=XGBOOST_LATEST_VERSION,
        sagemaker_session=sagemaker_session,
        train_instance_type=INSTANCE_TYPE,
        train_instance_count=1,
        container_log_level=container_log_level,
        py_version=PYTHON_VERSION,
        base_job_name="job",
        source_dir=source_dir,
        enable_cloudwatch_metrics=enable_cloudwatch_metrics,
    )

    xgboost.fit(inputs="s3://mybucket/train", job_name="new_name")

    custom_image = "ubuntu:latest"
    new_role = "role"
    model_server_workers = 2
    vpc_config = {"Subnets": ["foo"], "SecurityGroupIds": ["bar"]}
    new_source_dir = "s3://myotherbucket/source"
    dependencies = ["/directory/a", "/directory/b"]
    model_name = "model-name"
    model = xgboost.create_model(
        image=custom_image,
        role=new_role,
        model_server_workers=model_server_workers,
        vpc_config_override=vpc_config,
        entry_point=SERVING_SCRIPT_FILE,
        source_dir=new_source_dir,
        dependencies=dependencies,
        name=model_name,
    )

    assert model.image == custom_image
    assert model.role == new_role
    assert model.model_server_workers == model_server_workers
    assert model.vpc_config == vpc_config
    assert model.entry_point == SERVING_SCRIPT_FILE
    assert model.source_dir == new_source_dir
    assert model.dependencies == dependencies
    assert model.name == model_name
예제 #12
0
def test_attach(sagemaker_session, xgboost_version):
    training_image = "1.dkr.ecr.us-west-2.amazonaws.com/sagemaker-xgboost:{}-cpu-{}".format(
        xgboost_version, PYTHON_VERSION)
    returned_job_description = {
        "AlgorithmSpecification": {
            "TrainingInputMode": "File",
            "TrainingImage": training_image
        },
        "HyperParameters": {
            "sagemaker_submit_directory": '"s3://some/sourcedir.tar.gz"',
            "sagemaker_program": '"iris-dnn-classifier.py"',
            "sagemaker_s3_uri_training":
            '"sagemaker-3/integ-test-data/tf_iris"',
            "sagemaker_enable_cloudwatch_metrics": "false",
            "sagemaker_container_log_level": '"logging.INFO"',
            "sagemaker_job_name": '"neo"',
            "training_steps": "100",
            "sagemaker_region": '"us-west-2"',
        },
        "RoleArn": "arn:aws:iam::366:role/SageMakerRole",
        "ResourceConfig": {
            "VolumeSizeInGB": 30,
            "InstanceCount": 1,
            "InstanceType": "ml.c4.xlarge",
        },
        "StoppingCondition": {
            "MaxRuntimeInSeconds": 24 * 60 * 60
        },
        "TrainingJobName": "neo",
        "TrainingJobStatus": "Completed",
        "TrainingJobArn": "arn:aws:sagemaker:us-west-2:336:training-job/neo",
        "OutputDataConfig": {
            "KmsKeyId": "",
            "S3OutputPath": "s3://place/output/neo"
        },
        "TrainingJobOutput": {
            "S3TrainingJobOutput": "s3://here/output.tar.gz"
        },
    }
    sagemaker_session.sagemaker_client.describe_training_job = Mock(
        name="describe_training_job", return_value=returned_job_description)

    estimator = XGBoost.attach(training_job_name="neo",
                               sagemaker_session=sagemaker_session)
    assert estimator._current_job_name == "neo"
    assert estimator.latest_training_job.job_name == "neo"
    assert estimator.py_version == PYTHON_VERSION
    assert estimator.framework_version == xgboost_version
    assert estimator.role == "arn:aws:iam::366:role/SageMakerRole"
    assert estimator.train_instance_count == 1
    assert estimator.train_max_run == 24 * 60 * 60
    assert estimator.input_mode == "File"
    assert estimator.base_job_name == "neo"
    assert estimator.output_path == "s3://place/output/neo"
    assert estimator.output_kms_key == ""
    assert estimator.hyperparameters()["training_steps"] == "100"
    assert estimator.source_dir == "s3://some/sourcedir.tar.gz"
    assert estimator.entry_point == "iris-dnn-classifier.py"
예제 #13
0
def test_train_image(sagemaker_session, xgboost_version):
    container_log_level = '"logging.INFO"'
    source_dir = "s3://mybucket/source"
    xgboost = XGBoost(
        entry_point=SCRIPT_PATH,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        train_instance_type=INSTANCE_TYPE,
        train_instance_count=1,
        framework_version=xgboost_version,
        container_log_level=container_log_level,
        py_version=PYTHON_VERSION,
        base_job_name="job",
        source_dir=source_dir,
    )

    train_image = xgboost.train_image()
    assert (
        train_image ==
        "246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-xgboost:0.90-1-cpu-py3"
    )
예제 #14
0
def test_create_model_with_custom_image(sagemaker_session):
    container_log_level = '"logging.INFO"'
    source_dir = "s3://mybucket/source"
    custom_image = "ubuntu:latest"
    xgboost = XGBoost(
        entry_point=SCRIPT_PATH,
        role=ROLE,
        framework_version=XGBOOST_LATEST_VERSION,
        sagemaker_session=sagemaker_session,
        train_instance_type=INSTANCE_TYPE,
        train_instance_count=1,
        image_name=custom_image,
        container_log_level=container_log_level,
        py_version=PYTHON_VERSION,
        base_job_name="job",
        source_dir=source_dir,
    )

    xgboost.fit(inputs="s3://mybucket/train", job_name="new_name")
    model = xgboost.create_model()

    assert model.image == custom_image
예제 #15
0
def test_unsupported_xgboost_version_error(sagemaker_session):
    with pytest.raises(ValueError) as error1:
        XGBoost(
            entry_point=SCRIPT_PATH,
            role=ROLE,
            framework_version="1.1",
            sagemaker_session=sagemaker_session,
            instance_type=INSTANCE_TYPE,
            instance_count=1,
        )

    with pytest.raises(ValueError) as error2:
        XGBoost(
            entry_point=SCRIPT_PATH,
            role=ROLE,
            framework_version="1.1-1",
            sagemaker_session=sagemaker_session,
            instance_type=INSTANCE_TYPE,
            instance_count=1,
        )

    error_message = "XGBoost 1.1 is not supported"
    assert error_message in str(error1)
    assert error_message in str(error2)
def test_distributed_training(strftime, sagemaker_session,
                              xgboost_framework_version):
    xgboost = XGBoost(
        entry_point=SCRIPT_PATH,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        instance_count=DIST_INSTANCE_COUNT,
        instance_type=INSTANCE_TYPE,
        py_version=PYTHON_VERSION,
        framework_version=xgboost_framework_version,
    )

    inputs = "s3://mybucket/train"

    xgboost.fit(inputs=inputs)

    sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
    assert sagemaker_call_names == ["train", "logs_for_job"]
    boto_call_names = [
        c[0] for c in sagemaker_session.boto_session.method_calls
    ]
    assert boto_call_names == ["resource"]

    expected_train_args = _create_train_job(xgboost_framework_version,
                                            DIST_INSTANCE_COUNT)
    expected_train_args["input_config"][0]["DataSource"]["S3DataSource"][
        "S3Uri"] = inputs

    actual_train_args = sagemaker_session.method_calls[0][2]
    assert actual_train_args == expected_train_args

    model = xgboost.create_model()

    expected_image_base = "246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-xgboost:{}-cpu-{}"
    assert {
        "Environment": {
            "SAGEMAKER_SUBMIT_DIRECTORY":
            "s3://mybucket/sagemaker-xgboost-{}/source/sourcedir.tar.gz".
            format(TIMESTAMP),
            "SAGEMAKER_PROGRAM":
            "dummy_script.py",
            "SAGEMAKER_REGION":
            "us-west-2",
            "SAGEMAKER_CONTAINER_LOG_LEVEL":
            "20",
        },
        "Image":
        expected_image_base.format(xgboost_framework_version, PYTHON_VERSION),
        "ModelDataUrl":
        "s3://m/m.tar.gz",
    } == model.prepare_container_def(CPU)

    assert "cpu" in model.prepare_container_def(CPU)["Image"]
    predictor = xgboost.deploy(1, CPU)
    assert isinstance(predictor, XGBoostPredictor)
예제 #17
0
def _xgboost_estimator(sagemaker_session,
                       framework_version=XGBOOST_LATEST_VERSION,
                       train_instance_type=None,
                       train_instance_count=1,
                       base_job_name=None,
                       **kwargs):

    return XGBoost(entry_point=SCRIPT_PATH,
                   framework_version=framework_version,
                   role=ROLE,
                   sagemaker_session=sagemaker_session,
                   train_instance_type=train_instance_type
                   if train_instance_type else INSTANCE_TYPE,
                   train_instance_count=train_instance_count,
                   base_job_name=base_job_name,
                   py_version=PYTHON_VERSION,
                   **kwargs)
예제 #18
0
def test_xgboost_gpu(time, strftime, sagemaker_session,
                     xgboost_gpu_framework_version):
    xgboost = XGBoost(
        entry_point=SCRIPT_PATH,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        instance_type=GPU_INSTANCE_TYPE,
        instance_count=1,
        framework_version=xgboost_gpu_framework_version,
    )

    inputs = "s3://mybucket/train"

    xgboost.fit(inputs=inputs, experiment_config=EXPERIMENT_CONFIG)

    sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
    assert sagemaker_call_names == ["train", "logs_for_job"]
    boto_call_names = [
        c[0] for c in sagemaker_session.boto_session.method_calls
    ]
    assert boto_call_names == ["resource"]

    expected_train_args = _create_train_job(xgboost_gpu_framework_version,
                                            instance_type=GPU_INSTANCE_TYPE)
    expected_train_args["input_config"][0]["DataSource"]["S3DataSource"][
        "S3Uri"] = inputs
    expected_train_args["experiment_config"] = EXPERIMENT_CONFIG

    actual_train_args = sagemaker_session.method_calls[0][2]
    assert actual_train_args == expected_train_args

    model = xgboost.create_model()

    assert {
        "Environment": {
            "SAGEMAKER_SUBMIT_DIRECTORY":
            "s3://mybucket/sagemaker-xgboost-{}/source/sourcedir.tar.gz".
            format(TIMESTAMP),
            "SAGEMAKER_PROGRAM":
            "dummy_script.py",
            "SAGEMAKER_REGION":
            "us-west-2",
            "SAGEMAKER_CONTAINER_LOG_LEVEL":
            "20",
        },
        "Image": _get_full_image_uri(xgboost_gpu_framework_version),
        "ModelDataUrl": "s3://m/m.tar.gz",
    } == model.prepare_container_def(GPU_INSTANCE_TYPE)

    predictor = xgboost.deploy(1, GPU_INSTANCE_TYPE)
    assert isinstance(predictor, XGBoostPredictor)
def test_xgboost_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_instance_type):
    with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):
        xgboost = XGBoost(
            entry_point=os.path.join(DATA_DIR, "dummy_script.py"),
            framework_version=XGBOOST_LATEST_VERSION,
            role=ROLE,
            sagemaker_session=sagemaker_session,
            train_instance_type=cpu_instance_type,
            train_instance_count=SINGLE_INSTANCE_COUNT,
            base_job_name="XGBoost job",
            py_version=PYTHON_VERSION,
        )

        training_config = _build_airflow_workflow(
            estimator=xgboost, instance_type=cpu_instance_type
        )

        _assert_that_s3_url_contains_data(
            sagemaker_session,
            training_config["HyperParameters"]["sagemaker_submit_directory"].strip('"'),
        )
예제 #20
0
def get_pipeline(
        region,
        sagemaker_project_arn=None,
        role=None,
        default_bucket='',
        pipeline_name='end-to-end-ml-sagemaker-pipeline',
        model_package_group_name='end-to-end-ml-sm-model-package-group',
        base_job_prefix='endtoendmlsm') -> Pipeline:
    """
    Gets the SM Pipeline.

    :param role: The execution role.
    :param bucket_name: The bucket where pipeline artifacts are stored.
    :param prefix: The prefix where pipeline artifacts are stored.
    :return: A Pipeline instance.
    """

    bucket_name = default_bucket
    prefix = 'endtoendmlsm'
    sagemaker_session = get_session(region, bucket_name)

    # ---------------------
    # Processing parameters
    # ---------------------
    # The path to the raw data.
    raw_data_path = 's3://gianpo-public/endtoendml/data/raw/predmain_raw_data_header.csv'.format(
        bucket_name, prefix)
    raw_data_path_param = ParameterString(name="raw_data_path",
                                          default_value=raw_data_path)
    # The output path to the training data.
    train_data_path = 's3://{0}/{1}/data/preprocessed/train/'.format(
        bucket_name, prefix)
    train_data_path_param = ParameterString(name="train_data_path",
                                            default_value=train_data_path)
    # The output path to the validation data.
    val_data_path = 's3://{0}/{1}/data/preprocessed/val/'.format(
        bucket_name, prefix)
    val_data_path_param = ParameterString(name="val_data_path",
                                          default_value=val_data_path)
    # The output path to the featurizer model.
    model_path = 's3://{0}/{1}/output/sklearn/'.format(bucket_name, prefix)
    model_path_param = ParameterString(name="model_path",
                                       default_value=model_path)
    # The instance type for the processing job.
    processing_instance_type_param = ParameterString(
        name="processing_instance_type", default_value='ml.m5.large')
    # The instance count for the processing job.
    processing_instance_count_param = ParameterInteger(
        name="processing_instance_count", default_value=1)
    # The train/test split ration parameter.
    train_test_split_ratio_param = ParameterString(
        name="train_test_split_ratio", default_value='0.2')
    # -------------------
    # Training parameters
    # -------------------
    # XGB hyperparameters.
    max_depth_param = ParameterString(name="max_depth", default_value='3')
    eta_param = ParameterString(name="eta", default_value='0.1')
    gamma_param = ParameterString(name="gamma", default_value='0')
    min_child_weight_param = ParameterString(name="min_child_weight",
                                             default_value='1')
    objective_param = ParameterString(name="objective",
                                      default_value='binary:logistic')
    num_round_param = ParameterString(name="num_round", default_value='10')
    eval_metric_param = ParameterString(name="eval_metric",
                                        default_value='auc')
    # The instance type for the training job.
    training_instance_type_param = ParameterString(
        name="training_instance_type", default_value='ml.m5.xlarge')
    # The instance count for the training job.
    training_instance_count_param = ParameterInteger(
        name="training_instance_count", default_value=1)
    # The training output path for the model.
    output_path = 's3://{0}/{1}/output/'.format(bucket_name, prefix)
    output_path_param = ParameterString(name="output_path",
                                        default_value=output_path)
    # --------------------------
    # Register model parameters
    # --------------------------
    # The default instance type for deployment.
    deploy_instance_type_param = ParameterString(name="deploy_instance_type",
                                                 default_value='ml.m5.2xlarge')
    # The approval status for models added to the registry.
    model_approval_status_param = ParameterString(
        name="model_approval_status", default_value='PendingManualApproval')
    # --------------------------
    # Processing Step
    # --------------------------
    sklearn_processor = SKLearnProcessor(
        role=role,
        instance_type=processing_instance_type_param,
        instance_count=processing_instance_count_param,
        framework_version='0.20.0')
    inputs = [
        ProcessingInput(input_name='raw_data',
                        source=raw_data_path_param,
                        destination='/opt/ml/processing/input')
    ]
    outputs = [
        ProcessingOutput(output_name='train_data',
                         source='/opt/ml/processing/train',
                         destination=train_data_path_param),
        ProcessingOutput(output_name='val_data',
                         source='/opt/ml/processing/val',
                         destination=val_data_path_param),
        ProcessingOutput(output_name='model',
                         source='/opt/ml/processing/model',
                         destination=model_path_param)
    ]
    code_path = os.path.join(BASE_DIR, 'dataprep/preprocess.py')
    processing_step = ProcessingStep(name='Processing',
                                     code=code_path,
                                     processor=sklearn_processor,
                                     inputs=inputs,
                                     outputs=outputs,
                                     job_arguments=[
                                         '--train-test-split-ratio',
                                         train_test_split_ratio_param
                                     ])
    # --------------------------
    # Training Step
    # --------------------------
    hyperparameters = {
        "max_depth": max_depth_param,
        "eta": eta_param,
        "gamma": gamma_param,
        "min_child_weight": min_child_weight_param,
        "silent": 0,
        "objective": objective_param,
        "num_round": num_round_param,
        "eval_metric": eval_metric_param
    }
    entry_point = 'train.py'
    source_dir = os.path.join(BASE_DIR, 'train/')
    code_location = 's3://{0}/{1}/code'.format(bucket_name, prefix)
    estimator = XGBoost(entry_point=entry_point,
                        source_dir=source_dir,
                        output_path=output_path_param,
                        code_location=code_location,
                        hyperparameters=hyperparameters,
                        instance_type=training_instance_type_param,
                        instance_count=training_instance_count_param,
                        framework_version="0.90-2",
                        py_version="py3",
                        role=role)
    training_step = TrainingStep(
        name='Training',
        estimator=estimator,
        inputs={
            'train':
            TrainingInput(
                s3_data=processing_step.properties.ProcessingOutputConfig.
                Outputs['train_data'].S3Output.S3Uri,
                content_type='text/csv'),
            'validation':
            TrainingInput(
                s3_data=processing_step.properties.ProcessingOutputConfig.
                Outputs['val_data'].S3Output.S3Uri,
                content_type='text/csv')
        })
    # --------------------------
    # Register Model Step
    # --------------------------
    code_location = 's3://{0}/{1}/code'.format(bucket_name, prefix)
    sklearn_model = SKLearnModel(
        name='end-to-end-ml-sm-skl-model-{0}'.format(str(int(time.time()))),
        model_data=processing_step.properties.ProcessingOutputConfig.
        Outputs['model'].S3Output.S3Uri,
        entry_point='inference.py',
        source_dir=os.path.join(BASE_DIR, 'deploy/sklearn/'),
        code_location=code_location,
        role=role,
        sagemaker_session=sagemaker_session,
        framework_version='0.20.0',
        py_version='py3')
    code_location = 's3://{0}/{1}/code'.format(bucket_name, prefix)
    xgboost_model = XGBoostModel(
        name='end-to-end-ml-sm-xgb-model-{0}'.format(str(int(time.time()))),
        model_data=training_step.properties.ModelArtifacts.S3ModelArtifacts,
        entry_point='inference.py',
        source_dir=os.path.join(BASE_DIR, 'deploy/xgboost/'),
        code_location=code_location,
        framework_version='0.90-2',
        py_version='py3',
        role=role,
        sagemaker_session=sagemaker_session)
    pipeline_model_name = 'end-to-end-ml-sm-xgb-skl-pipeline-{0}'.format(
        str(int(time.time())))
    pipeline_model = PipelineModel(name=pipeline_model_name,
                                   role=role,
                                   models=[sklearn_model, xgboost_model],
                                   sagemaker_session=sagemaker_session)

    register_model_step = RegisterModel(
        name='RegisterModel',
        content_types=['text/csv'],
        response_types=['application/json', 'text/csv'],
        inference_instances=[deploy_instance_type_param, 'ml.m5.large'],
        transform_instances=['ml.c5.4xlarge'],
        model_package_group_name=model_package_group_name,
        approval_status=model_approval_status_param,
        model=pipeline_model)
    # --------------------------
    # Pipeline
    # --------------------------

    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[
            raw_data_path_param, train_data_path_param, val_data_path_param,
            model_path_param, processing_instance_type_param,
            processing_instance_count_param, train_test_split_ratio_param,
            max_depth_param, eta_param, gamma_param, min_child_weight_param,
            objective_param, num_round_param, eval_metric_param,
            training_instance_type_param, training_instance_count_param,
            output_path_param, deploy_instance_type_param,
            model_approval_status_param
        ],
        steps=[processing_step, training_step, register_model_step],
        sagemaker_session=sagemaker_session,
    )
    response = pipeline.upsert(role_arn=role)
    print(response["PipelineArn"])
    return pipeline
def test_sklearn_xgboost_sip_model_registration(sagemaker_session, role,
                                                pipeline_name, region_name):
    prefix = "sip"
    bucket_name = sagemaker_session.default_bucket()
    instance_count = ParameterInteger(name="InstanceCount", default_value=1)
    instance_type = ParameterString(name="InstanceType",
                                    default_value="ml.m5.xlarge")

    sklearn_processor = SKLearnProcessor(
        role=role,
        instance_type=instance_type,
        instance_count=instance_count,
        framework_version="0.20.0",
        sagemaker_session=sagemaker_session,
    )

    # The path to the raw data.
    raw_data_path = "s3://{0}/{1}/data/raw/".format(bucket_name, prefix)
    raw_data_path_param = ParameterString(name="raw_data_path",
                                          default_value=raw_data_path)

    # The output path to the training data.
    train_data_path = "s3://{0}/{1}/data/preprocessed/train/".format(
        bucket_name, prefix)
    train_data_path_param = ParameterString(name="train_data_path",
                                            default_value=train_data_path)

    # The output path to the validation data.
    val_data_path = "s3://{0}/{1}/data/preprocessed/val/".format(
        bucket_name, prefix)
    val_data_path_param = ParameterString(name="val_data_path",
                                          default_value=val_data_path)

    # The training output path for the model.
    output_path = "s3://{0}/{1}/output/".format(bucket_name, prefix)
    output_path_param = ParameterString(name="output_path",
                                        default_value=output_path)

    # The output path to the featurizer model.
    model_path = "s3://{0}/{1}/output/sklearn/".format(bucket_name, prefix)
    model_path_param = ParameterString(name="model_path",
                                       default_value=model_path)

    inputs = [
        ProcessingInput(
            input_name="raw_data",
            source=raw_data_path_param,
            destination="/opt/ml/processing/input",
        )
    ]

    outputs = [
        ProcessingOutput(
            output_name="train_data",
            source="/opt/ml/processing/train",
            destination=train_data_path_param,
        ),
        ProcessingOutput(
            output_name="val_data",
            source="/opt/ml/processing/val",
            destination=val_data_path_param,
        ),
        ProcessingOutput(
            output_name="model",
            source="/opt/ml/processing/model",
            destination=model_path_param,
        ),
    ]

    base_dir = os.path.join(DATA_DIR, "sip")
    code_path = os.path.join(base_dir, "preprocessor.py")

    processing_step = ProcessingStep(
        name="Processing",
        code=code_path,
        processor=sklearn_processor,
        inputs=inputs,
        outputs=outputs,
        job_arguments=["--train-test-split-ratio", "0.2"],
    )

    entry_point = "training.py"
    source_dir = base_dir
    code_location = "s3://{0}/{1}/code".format(bucket_name, prefix)

    estimator = XGBoost(
        entry_point=entry_point,
        source_dir=source_dir,
        output_path=output_path_param,
        code_location=code_location,
        instance_type=instance_type,
        instance_count=instance_count,
        framework_version="0.90-2",
        sagemaker_session=sagemaker_session,
        py_version="py3",
        role=role,
    )

    training_step = TrainingStep(
        name="Training",
        estimator=estimator,
        inputs={
            "train":
            TrainingInput(
                s3_data=processing_step.properties.ProcessingOutputConfig.
                Outputs["train_data"].S3Output.S3Uri,
                content_type="text/csv",
            ),
            "validation":
            TrainingInput(
                s3_data=processing_step.properties.ProcessingOutputConfig.
                Outputs["val_data"].S3Output.S3Uri,
                content_type="text/csv",
            ),
        },
    )

    code_location = "s3://{0}/{1}/code".format(bucket_name, prefix)
    source_dir = os.path.join(base_dir, "sklearn_source_dir")

    sklearn_model = SKLearnModel(
        name="sklearn-model",
        model_data=processing_step.properties.ProcessingOutputConfig.
        Outputs["model"].S3Output.S3Uri,
        entry_point="inference.py",
        source_dir=source_dir,
        code_location=code_location,
        role=role,
        sagemaker_session=sagemaker_session,
        framework_version="0.20.0",
        py_version="py3",
    )

    code_location = "s3://{0}/{1}/code".format(bucket_name, prefix)
    source_dir = os.path.join(base_dir, "xgboost_source_dir")

    xgboost_model = XGBoostModel(
        name="xgboost-model",
        model_data=training_step.properties.ModelArtifacts.S3ModelArtifacts,
        entry_point="inference.py",
        source_dir=source_dir,
        code_location=code_location,
        framework_version="0.90-2",
        py_version="py3",
        role=role,
        sagemaker_session=sagemaker_session,
    )

    pipeline_model = PipelineModel([xgboost_model, sklearn_model],
                                   role,
                                   sagemaker_session=sagemaker_session)

    step_register = RegisterModel(
        name="AbaloneRegisterModel",
        model=pipeline_model,
        content_types=["application/json"],
        response_types=["application/json"],
        inference_instances=["ml.t2.medium", "ml.m5.xlarge"],
        transform_instances=["ml.m5.xlarge"],
        model_package_group_name="windturbine",
    )

    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[
            raw_data_path_param,
            train_data_path_param,
            val_data_path_param,
            model_path_param,
            instance_type,
            instance_count,
            output_path_param,
        ],
        steps=[processing_step, training_step, step_register],
        sagemaker_session=sagemaker_session,
    )

    try:
        response = pipeline.upsert(role_arn=role)
        create_arn = response["PipelineArn"]
        assert re.match(
            rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}",
            create_arn,
        )

        execution = pipeline.start(parameters={})
        assert re.match(
            rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}/execution/",
            execution.arn,
        )

        execution = pipeline.start()
        assert re.match(
            rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}/execution/",
            execution.arn,
        )
    finally:
        try:
            pipeline.delete()
        except Exception:
            pass
def test_model_registration_with_drift_check_baselines(
    sagemaker_session,
    role,
    pipeline_name,
):
    instance_count = ParameterInteger(name="InstanceCount", default_value=1)
    instance_type = ParameterString(name="InstanceType",
                                    default_value="ml.m5.xlarge")

    # upload model data to s3
    model_local_path = os.path.join(DATA_DIR, "mxnet_mnist/model.tar.gz")
    model_base_uri = "s3://{}/{}/input/model/{}".format(
        sagemaker_session.default_bucket(),
        "register_model_test_with_drift_baseline",
        utils.unique_name_from_base("model"),
    )
    model_uri = S3Uploader.upload(model_local_path,
                                  model_base_uri,
                                  sagemaker_session=sagemaker_session)
    model_uri_param = ParameterString(name="model_uri",
                                      default_value=model_uri)

    # upload metrics to s3
    metrics_data = (
        '{"regression_metrics": {"mse": {"value": 4.925353410353891, '
        '"standard_deviation": 2.219186917819692}}}')
    metrics_base_uri = "s3://{}/{}/input/metrics/{}".format(
        sagemaker_session.default_bucket(),
        "register_model_test_with_drift_baseline",
        utils.unique_name_from_base("metrics"),
    )
    metrics_uri = S3Uploader.upload_string_as_file_body(
        body=metrics_data,
        desired_s3_uri=metrics_base_uri,
        sagemaker_session=sagemaker_session,
    )
    metrics_uri_param = ParameterString(name="metrics_uri",
                                        default_value=metrics_uri)

    model_metrics = ModelMetrics(
        bias=MetricsSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
        explainability=MetricsSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
        bias_pre_training=MetricsSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
        bias_post_training=MetricsSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
    )
    drift_check_baselines = DriftCheckBaselines(
        model_statistics=MetricsSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
        model_constraints=MetricsSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
        model_data_statistics=MetricsSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
        model_data_constraints=MetricsSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
        bias_config_file=FileSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
        bias_pre_training_constraints=MetricsSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
        bias_post_training_constraints=MetricsSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
        explainability_constraints=MetricsSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
        explainability_config_file=FileSource(
            s3_uri=metrics_uri_param,
            content_type="application/json",
        ),
    )
    customer_metadata_properties = {"key1": "value1"}
    estimator = XGBoost(
        entry_point="training.py",
        source_dir=os.path.join(DATA_DIR, "sip"),
        instance_type=instance_type,
        instance_count=instance_count,
        framework_version="0.90-2",
        sagemaker_session=sagemaker_session,
        py_version="py3",
        role=role,
    )
    step_register = RegisterModel(
        name="MyRegisterModelStep",
        estimator=estimator,
        model_data=model_uri_param,
        content_types=["application/json"],
        response_types=["application/json"],
        inference_instances=["ml.t2.medium", "ml.m5.xlarge"],
        transform_instances=["ml.m5.xlarge"],
        model_package_group_name="testModelPackageGroup",
        model_metrics=model_metrics,
        drift_check_baselines=drift_check_baselines,
        customer_metadata_properties=customer_metadata_properties,
    )

    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[
            model_uri_param,
            metrics_uri_param,
            instance_type,
            instance_count,
        ],
        steps=[step_register],
        sagemaker_session=sagemaker_session,
    )

    try:
        response = pipeline.create(role)
        create_arn = response["PipelineArn"]

        for _ in retries(
                max_retry_count=5,
                exception_message_prefix=
                "Waiting for a successful execution of pipeline",
                seconds_to_sleep=10,
        ):
            execution = pipeline.start(parameters={
                "model_uri": model_uri,
                "metrics_uri": metrics_uri
            })
            response = execution.describe()

            assert response["PipelineArn"] == create_arn

            try:
                execution.wait(delay=30, max_attempts=60)
            except WaiterError:
                pass
            execution_steps = execution.list_steps()

            assert len(execution_steps) == 1
            failure_reason = execution_steps[0].get("FailureReason", "")
            if failure_reason != "":
                logging.error(
                    f"Pipeline execution failed with error: {failure_reason}."
                    " Retrying..")
                continue
            assert execution_steps[0]["StepStatus"] == "Succeeded"
            assert execution_steps[0]["StepName"] == "MyRegisterModelStep"

            response = sagemaker_session.sagemaker_client.describe_model_package(
                ModelPackageName=execution_steps[0]["Metadata"]
                ["RegisterModel"]["Arn"])

            assert (response["ModelMetrics"]["Explainability"]["Report"]
                    ["ContentType"] == "application/json")
            assert (response["DriftCheckBaselines"]["Bias"][
                "PreTrainingConstraints"]["ContentType"] == "application/json")
            assert (response["DriftCheckBaselines"]["Explainability"]
                    ["Constraints"]["ContentType"] == "application/json")
            assert (response["DriftCheckBaselines"]["ModelQuality"]
                    ["Statistics"]["ContentType"] == "application/json")
            assert (response["DriftCheckBaselines"]["ModelDataQuality"]
                    ["Statistics"]["ContentType"] == "application/json")
            assert response[
                "CustomerMetadataProperties"] == customer_metadata_properties
            break
    finally:
        try:
            pipeline.delete()
        except Exception:
            pass
예제 #23
0
    "max_depth": "10",
    "eta": "0.2",
    "gamma": "1",
    "min_child_weight": "6",
    "silent": "0",
    "objective": "multi:softmax",
    "num_class": "15",
    "num_round": "1"  # TEMP: Hack to make faster
}

xgb = XGBoost(entry_point=entry_point,
              source_dir=source_dir,
              output_path=model_output_path,
              code_location=model_code_location,
              hyperparameters=hyperparameters,
              train_instance_type="ml.m5.4xlarge",
              train_instance_count=1,
              framework_version="0.90-2",
              py_version="py3",
              role=sagemaker_execution_role,
              debugger_hook_config=debug_hook_config,
              rules=debug_rules)

# Upload model code to s3

xgb.prepare_workflow_for_training(job_name)
print('uploaded code to: {}'.format(xgb.uploaded_code.s3_prefix))

# Create Workflow steps

execution_input = ExecutionInput(schema={
    'TrainLocation': str,