Ejemplo n.º 1
0
def test_fail_distributed_training(sagemaker_session, sklearn_version):
    with pytest.raises(AttributeError) as error:
        SKLearn(
            entry_point=SCRIPT_PATH,
            role=ROLE,
            sagemaker_session=sagemaker_session,
            train_instance_count=DIST_INSTANCE_COUNT,
            train_instance_type=INSTANCE_TYPE,
            py_version=PYTHON_VERSION,
            framework_version=sklearn_version,
        )
    assert "Scikit-Learn does not support distributed training." in str(error)
Ejemplo n.º 2
0
def test_training_with_additional_hyperparameters(sagemaker_session, sklearn_full_version):
    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        script_path = os.path.join(DATA_DIR, 'sklearn_mnist', 'mnist.py')
        data_path = os.path.join(DATA_DIR, 'sklearn_mnist')

        sklearn = SKLearn(entry_point=script_path,
                          role='SageMakerRole',
                          train_instance_type="ml.c4.xlarge",
                          framework_version=sklearn_full_version,
                          py_version=PYTHON_VERSION,
                          sagemaker_session=sagemaker_session,
                          hyperparameters={'epochs': 1})

        train_input = sklearn.sagemaker_session.upload_data(path=os.path.join(data_path, 'train'),
                                                            key_prefix='integ-test-data/sklearn_mnist/train')
        test_input = sklearn.sagemaker_session.upload_data(path=os.path.join(data_path, 'test'),
                                                           key_prefix='integ-test-data/sklearn_mnist/test')
        job_name = unique_name_from_base('test-sklearn-hp')

        sklearn.fit({'train': train_input, 'test': test_input}, job_name=job_name)
        return sklearn.latest_training_job.name
Ejemplo n.º 3
0
def test_attach_deploy(sklearn_training_job, sagemaker_session,
                       cpu_instance_type):
    endpoint_name = "test-sklearn-attach-deploy-{}".format(
        sagemaker_timestamp())

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        estimator = SKLearn.attach(sklearn_training_job,
                                   sagemaker_session=sagemaker_session)
        predictor = estimator.deploy(1,
                                     cpu_instance_type,
                                     endpoint_name=endpoint_name)
        _predict_and_assert(predictor)
Ejemplo n.º 4
0
def main(args):
    print("args.local=", args.local)
    # Initialise SDK
    sklearn_estimator = SKLearn(
        entry_point='src/train_and_deploy.py',
        role=CLOUD_CONFIG['sagemaker_role_id']['value'],
        train_instance_type='local' if args.local else 'ml.m4.xlarge',
        hyperparameters={
            'sagemaker_submit_directory':
            f"s3://{CLOUD_CONFIG['s3bucket']['value']}",
        },
        framework_version='0.23-1',
        metric_definitions=[{
            'Name': 'train:score',
            'Regex': 'train:score=(\S+)'
        }],
    )
    # Run model training job
    sklearn_estimator.fit({
        'train':
        "file://./data/data.csv"
        if args.local else f"s3://{CLOUD_CONFIG['s3bucket']['value']}/data.csv"
    })

    # Deploy trained model to an endpoint
    sklearn_estimator.deploy(
        instance_type='local' if args.local else 'ml.t2.medium',
        initial_instance_count=1,
        endpoint_name='demo-endpoint',
    )
def test_failed_training_job(sagemaker_session, sklearn_full_version):
    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        script_path = os.path.join(DATA_DIR, "sklearn_mnist", "failure_script.py")
        data_path = os.path.join(DATA_DIR, "sklearn_mnist")

        sklearn = SKLearn(
            entry_point=script_path,
            role="SageMakerRole",
            framework_version=sklearn_full_version,
            py_version=PYTHON_VERSION,
            train_instance_count=1,
            train_instance_type="ml.c4.xlarge",
            sagemaker_session=sagemaker_session,
        )

        train_input = sklearn.sagemaker_session.upload_data(
            path=os.path.join(data_path, "train"), key_prefix="integ-test-data/sklearn_mnist/train"
        )
        job_name = unique_name_from_base("test-sklearn-failed")

        with pytest.raises(ValueError):
            sklearn.fit(train_input, job_name=job_name)
Ejemplo n.º 6
0
def _sklearn_estimator(
    sagemaker_session, framework_version, instance_type=None, base_job_name=None, **kwargs
):
    return SKLearn(
        entry_point=SCRIPT_PATH,
        framework_version=framework_version,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        instance_type=instance_type if instance_type else INSTANCE_TYPE,
        base_job_name=base_job_name,
        py_version=PYTHON_VERSION,
        **kwargs
    )
Ejemplo n.º 7
0
def test_estimator_py2_warning(warning, sagemaker_session):
    estimator = SKLearn(
        entry_point=SCRIPT_PATH,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        train_instance_count=INSTANCE_COUNT,
        train_instance_type=INSTANCE_TYPE,
        py_version="py2",
    )

    assert estimator.py_version == "py2"
    warning.assert_called_with(estimator.__framework_name__,
                               defaults.LATEST_PY2_VERSION)
Ejemplo n.º 8
0
def test_estimator_throws_error_for_unsupported_version(
        error, sagemaker_session):
    with pytest.raises(ValueError):
        estimator = SKLearn(
            entry_point=SCRIPT_PATH,
            role=ROLE,
            sagemaker_session=sagemaker_session,
            train_instance_count=INSTANCE_COUNT,
            train_instance_type=INSTANCE_TYPE,
            framework_version="foo",
        )
        assert estimator.framework_version not in defaults.SKLEARN_SUPPORTED_VERSIONS
        error.assert_called_with(defaults.SKLEARN_NAME, "foo",
                                 defaults.SKLEARN_SUPPORT_VERSIONS)
def test_create_model_from_estimator(name_from_base, sagemaker_session,
                                     sklearn_version):
    container_log_level = '"logging.INFO"'
    source_dir = "s3://mybucket/source"
    base_job_name = "job"

    sklearn = SKLearn(
        entry_point=SCRIPT_PATH,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        instance_type=INSTANCE_TYPE,
        framework_version=sklearn_version,
        container_log_level=container_log_level,
        py_version=PYTHON_VERSION,
        base_job_name=base_job_name,
        source_dir=source_dir,
        enable_network_isolation=True,
    )

    sklearn.fit(inputs="s3://mybucket/train", job_name="new_name")

    model_name = "model_name"
    name_from_base.return_value = model_name
    model = sklearn.create_model()

    assert model.sagemaker_session == sagemaker_session
    assert model.framework_version == sklearn_version
    assert model.py_version == sklearn.py_version
    assert model.entry_point == SCRIPT_PATH
    assert model.role == ROLE
    assert model.name == model_name
    assert model.container_log_level == container_log_level
    assert model.source_dir == source_dir
    assert model.vpc_config is None
    assert model.enable_network_isolation()

    name_from_base.assert_called_with(base_job_name)
Ejemplo n.º 10
0
def test_training_with_network_isolation(
    sagemaker_session,
    sklearn_latest_version,
    sklearn_latest_py_version,
    cpu_instance_type,
):
    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        script_path = os.path.join(DATA_DIR, "sklearn_mnist", "mnist.py")
        data_path = os.path.join(DATA_DIR, "sklearn_mnist")

        sklearn = SKLearn(
            entry_point=script_path,
            role="SageMakerRole",
            instance_type=cpu_instance_type,
            framework_version=sklearn_latest_version,
            py_version=sklearn_latest_py_version,
            sagemaker_session=sagemaker_session,
            hyperparameters={"epochs": 1},
            enable_network_isolation=True,
        )

        train_input = sklearn.sagemaker_session.upload_data(
            path=os.path.join(data_path, "train"),
            key_prefix="integ-test-data/sklearn_mnist/train")
        test_input = sklearn.sagemaker_session.upload_data(
            path=os.path.join(data_path, "test"),
            key_prefix="integ-test-data/sklearn_mnist/test")
        job_name = unique_name_from_base("test-sklearn-hp")

        sklearn.fit({
            "train": train_input,
            "test": test_input
        },
                    job_name=job_name)
        assert sagemaker_session.sagemaker_client.describe_training_job(
            TrainingJobName=job_name)["EnableNetworkIsolation"]
Ejemplo n.º 11
0
def test_async_fit(sagemaker_session):
    endpoint_name = 'test-sklearn-attach-deploy-{}'.format(sagemaker_timestamp())

    with timeout(minutes=5):
        training_job_name = _run_mnist_training_job(sagemaker_session, "ml.c4.xlarge",
                                                    sklearn_full_version=SKLEARN_VERSION, wait=False)

        print("Waiting to re-attach to the training job: %s" % training_job_name)
        time.sleep(20)

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        print("Re-attaching now to: %s" % training_job_name)
        estimator = SKLearn.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session)
        predictor = estimator.deploy(1, "ml.c4.xlarge", endpoint_name=endpoint_name)
        _predict_and_assert(predictor)
Ejemplo n.º 12
0
def _run_mnist_training_job(sagemaker_session,
                            instance_type,
                            sklearn_version,
                            py_version,
                            wait=True):
    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):

        script_path = os.path.join(DATA_DIR, "sklearn_mnist", "mnist.py")

        data_path = os.path.join(DATA_DIR, "sklearn_mnist")

        sklearn = SKLearn(
            entry_point=script_path,
            role="SageMakerRole",
            framework_version=sklearn_version,
            py_version=py_version,
            instance_type=instance_type,
            sagemaker_session=sagemaker_session,
            hyperparameters={"epochs": 1},
        )

        train_input = sklearn.sagemaker_session.upload_data(
            path=os.path.join(data_path, "train"),
            key_prefix="integ-test-data/sklearn_mnist/train")
        test_input = sklearn.sagemaker_session.upload_data(
            path=os.path.join(data_path, "test"),
            key_prefix="integ-test-data/sklearn_mnist/test")
        job_name = unique_name_from_base("test-sklearn-mnist")

        sklearn.fit({
            "train": train_input,
            "test": test_input
        },
                    wait=wait,
                    job_name=job_name)
        return sklearn.latest_training_job.name
Ejemplo n.º 13
0
def test_attach(sagemaker_session, sklearn_version):
    training_image = "1.dkr.ecr.us-west-2.amazonaws.com/sagemaker-scikit-learn:{}-cpu-{}".format(
        sklearn_version, PYTHON_VERSION
    )
    returned_job_description = {
        "AlgorithmSpecification": {"TrainingInputMode": "File", "TrainingImage": training_image},
        "HyperParameters": {
            "sagemaker_submit_directory": '"s3://some/sourcedir.tar.gz"',
            "sagemaker_program": '"iris-dnn-classifier.py"',
            "sagemaker_s3_uri_training": '"sagemaker-3/integ-test-data/tf_iris"',
            "sagemaker_enable_cloudwatch_metrics": "false",
            "sagemaker_container_log_level": '"logging.INFO"',
            "sagemaker_job_name": '"neo"',
            "training_steps": "100",
            "sagemaker_region": '"us-west-2"',
        },
        "RoleArn": "arn:aws:iam::366:role/SageMakerRole",
        "ResourceConfig": {
            "VolumeSizeInGB": 30,
            "InstanceCount": 1,
            "InstanceType": "ml.c4.xlarge",
        },
        "StoppingCondition": {"MaxRuntimeInSeconds": 24 * 60 * 60},
        "TrainingJobName": "neo",
        "TrainingJobStatus": "Completed",
        "TrainingJobArn": "arn:aws:sagemaker:us-west-2:336:training-job/neo",
        "OutputDataConfig": {"KmsKeyId": "", "S3OutputPath": "s3://place/output/neo"},
        "TrainingJobOutput": {"S3TrainingJobOutput": "s3://here/output.tar.gz"},
    }
    sagemaker_session.sagemaker_client.describe_training_job = Mock(
        name="describe_training_job", return_value=returned_job_description
    )

    estimator = SKLearn.attach(training_job_name="neo", sagemaker_session=sagemaker_session)
    assert estimator._current_job_name == "neo"
    assert estimator.latest_training_job.job_name == "neo"
    assert estimator.py_version == PYTHON_VERSION
    assert estimator.framework_version == sklearn_version
    assert estimator.role == "arn:aws:iam::366:role/SageMakerRole"
    assert estimator.train_instance_count == 1
    assert estimator.train_max_run == 24 * 60 * 60
    assert estimator.input_mode == "File"
    assert estimator.base_job_name == "neo"
    assert estimator.output_path == "s3://place/output/neo"
    assert estimator.output_kms_key == ""
    assert estimator.hyperparameters()["training_steps"] == "100"
    assert estimator.source_dir == "s3://some/sourcedir.tar.gz"
    assert estimator.entry_point == "iris-dnn-classifier.py"
Ejemplo n.º 14
0
def test_sklearn(strftime, sagemaker_session, sklearn_version):
    sklearn = SKLearn(
        entry_point=SCRIPT_PATH,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        train_instance_type=INSTANCE_TYPE,
        py_version=PYTHON_VERSION,
        framework_version=sklearn_version,
    )

    inputs = "s3://mybucket/train"

    sklearn.fit(inputs=inputs, experiment_config=EXPERIMENT_CONFIG)

    sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
    assert sagemaker_call_names == ["train", "logs_for_job"]
    boto_call_names = [
        c[0] for c in sagemaker_session.boto_session.method_calls
    ]
    assert boto_call_names == ["resource"]

    expected_train_args = _create_train_job(sklearn_version)
    expected_train_args["input_config"][0]["DataSource"]["S3DataSource"][
        "S3Uri"] = inputs
    expected_train_args["experiment_config"] = EXPERIMENT_CONFIG

    actual_train_args = sagemaker_session.method_calls[0][2]
    assert actual_train_args == expected_train_args

    model = sklearn.create_model()

    expected_image_base = (
        "246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-scikit-learn:{}-cpu-{}"
    )
    assert {
        "Environment": {
            "SAGEMAKER_SUBMIT_DIRECTORY":
            "s3://mybucket/sagemaker-scikit-learn-{}/source/sourcedir.tar.gz".
            format(TIMESTAMP),
            "SAGEMAKER_PROGRAM":
            "dummy_script.py",
            "SAGEMAKER_ENABLE_CLOUDWATCH_METRICS":
            "false",
            "SAGEMAKER_REGION":
            "us-west-2",
            "SAGEMAKER_CONTAINER_LOG_LEVEL":
            "20",
        },
        "Image": expected_image_base.format(sklearn_version, PYTHON_VERSION),
        "ModelDataUrl": "s3://m/m.tar.gz",
    } == model.prepare_container_def(CPU)

    assert "cpu" in model.prepare_container_def(CPU)["Image"]
    predictor = sklearn.deploy(1, CPU)
    assert isinstance(predictor, SKLearnPredictor)
Ejemplo n.º 15
0
def test_attach_custom_image(sagemaker_session):
    training_image = "1.dkr.ecr.us-west-2.amazonaws.com/my_custom_sklearn_image:latest"
    returned_job_description = {
        "AlgorithmSpecification": {
            "TrainingInputMode": "File",
            "TrainingImage": training_image
        },
        "HyperParameters": {
            "sagemaker_submit_directory": '"s3://some/sourcedir.tar.gz"',
            "sagemaker_program": '"iris-dnn-classifier.py"',
            "sagemaker_s3_uri_training":
            '"sagemaker-3/integ-test-data/tf_iris"',
            "sagemaker_enable_cloudwatch_metrics": "false",
            "sagemaker_container_log_level": '"logging.INFO"',
            "sagemaker_job_name": '"neo"',
            "training_steps": "100",
            "sagemaker_region": '"us-west-2"',
        },
        "RoleArn": "arn:aws:iam::366:role/SageMakerRole",
        "ResourceConfig": {
            "VolumeSizeInGB": 30,
            "InstanceCount": 1,
            "InstanceType": "ml.c4.xlarge",
        },
        "StoppingCondition": {
            "MaxRuntimeInSeconds": 24 * 60 * 60
        },
        "TrainingJobName": "neo",
        "TrainingJobStatus": "Completed",
        "TrainingJobArn": "arn:aws:sagemaker:us-west-2:336:training-job/neo",
        "OutputDataConfig": {
            "KmsKeyId": "",
            "S3OutputPath": "s3://place/output/neo"
        },
        "TrainingJobOutput": {
            "S3TrainingJobOutput": "s3://here/output.tar.gz"
        },
    }
    sagemaker_session.sagemaker_client.describe_training_job = Mock(
        name="describe_training_job", return_value=returned_job_description)

    estimator = SKLearn.attach(training_job_name="neo",
                               sagemaker_session=sagemaker_session)
    assert estimator.image_name == training_image
    assert estimator.train_image() == training_image
Ejemplo n.º 16
0
def test_attach_custom_image(sagemaker_session):
    training_image = '1.dkr.ecr.us-west-2.amazonaws.com/my_custom_sklearn_image:latest'
    returned_job_description = {
        'AlgorithmSpecification': {
            'TrainingInputMode': 'File',
            'TrainingImage': training_image
        },
        'HyperParameters': {
            'sagemaker_submit_directory': '"s3://some/sourcedir.tar.gz"',
            'sagemaker_program': '"iris-dnn-classifier.py"',
            'sagemaker_s3_uri_training':
            '"sagemaker-3/integ-test-data/tf_iris"',
            'sagemaker_enable_cloudwatch_metrics': 'false',
            'sagemaker_container_log_level': '"logging.INFO"',
            'sagemaker_job_name': '"neo"',
            'training_steps': '100',
            'sagemaker_region': '"us-west-2"'
        },
        'RoleArn': 'arn:aws:iam::366:role/SageMakerRole',
        'ResourceConfig': {
            'VolumeSizeInGB': 30,
            'InstanceCount': 1,
            'InstanceType': 'ml.c4.xlarge'
        },
        'StoppingCondition': {
            'MaxRuntimeInSeconds': 24 * 60 * 60
        },
        'TrainingJobName': 'neo',
        'TrainingJobStatus': 'Completed',
        'TrainingJobArn': 'arn:aws:sagemaker:us-west-2:336:training-job/neo',
        'OutputDataConfig': {
            'KmsKeyId': '',
            'S3OutputPath': 's3://place/output/neo'
        },
        'TrainingJobOutput': {
            'S3TrainingJobOutput': 's3://here/output.tar.gz'
        }
    }
    sagemaker_session.sagemaker_client.describe_training_job = Mock(
        name='describe_training_job', return_value=returned_job_description)

    estimator = SKLearn.attach(training_job_name='neo',
                               sagemaker_session=sagemaker_session)
    assert estimator.image_name == training_image
    assert estimator.train_image() == training_image
Ejemplo n.º 17
0
def test_sklearn(strftime, sagemaker_session, sklearn_version):
    sklearn = SKLearn(entry_point=SCRIPT_PATH,
                      role=ROLE,
                      sagemaker_session=sagemaker_session,
                      train_instance_type=INSTANCE_TYPE,
                      py_version=PYTHON_VERSION,
                      framework_version=sklearn_version)

    inputs = 's3://mybucket/train'

    sklearn.fit(inputs=inputs)

    sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
    assert sagemaker_call_names == ['train', 'logs_for_job']
    boto_call_names = [
        c[0] for c in sagemaker_session.boto_session.method_calls
    ]
    assert boto_call_names == ['resource']

    expected_train_args = _create_train_job(sklearn_version)
    expected_train_args['input_config'][0]['DataSource']['S3DataSource'][
        'S3Uri'] = inputs

    actual_train_args = sagemaker_session.method_calls[0][2]
    assert actual_train_args == expected_train_args

    model = sklearn.create_model()

    expected_image_base = '246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-scikit-learn:{}-cpu-{}'
    assert {
        'Environment': {
            'SAGEMAKER_SUBMIT_DIRECTORY':
            's3://mybucket/sagemaker-scikit-learn-{}/source/sourcedir.tar.gz'.
            format(TIMESTAMP),
            'SAGEMAKER_PROGRAM':
            'dummy_script.py',
            'SAGEMAKER_ENABLE_CLOUDWATCH_METRICS':
            'false',
            'SAGEMAKER_REGION':
            'us-west-2',
            'SAGEMAKER_CONTAINER_LOG_LEVEL':
            '20'
        },
        'Image': expected_image_base.format(sklearn_version, PYTHON_VERSION),
        'ModelDataUrl': 's3://m/m.tar.gz'
    } == model.prepare_container_def(CPU)

    assert 'cpu' in model.prepare_container_def(CPU)['Image']
    predictor = sklearn.deploy(1, CPU)
    assert isinstance(predictor, SKLearnPredictor)
def test_sklearn_airflow_config_uploads_data_source_to_s3(
    sagemaker_session,
    cpu_instance_type,
    sklearn_latest_version,
    sklearn_latest_py_version,
):
    with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):
        script_path = os.path.join(DATA_DIR, "sklearn_mnist", "mnist.py")
        data_path = os.path.join(DATA_DIR, "sklearn_mnist")

        sklearn = SKLearn(
            entry_point=script_path,
            role=ROLE,
            instance_type=cpu_instance_type,
            framework_version=sklearn_latest_version,
            py_version=sklearn_latest_py_version,
            sagemaker_session=sagemaker_session,
            hyperparameters={"epochs": 1},
        )

        train_input = sklearn.sagemaker_session.upload_data(
            path=os.path.join(data_path, "train"),
            key_prefix="integ-test-data/sklearn_mnist/train")
        test_input = sklearn.sagemaker_session.upload_data(
            path=os.path.join(data_path, "test"),
            key_prefix="integ-test-data/sklearn_mnist/test")

        training_config = _build_airflow_workflow(
            estimator=sklearn,
            instance_type=cpu_instance_type,
            inputs={
                "train": train_input,
                "test": test_input
            },
        )

        _assert_that_s3_url_contains_data(
            sagemaker_session,
            training_config["HyperParameters"]
            ["sagemaker_submit_directory"].strip('"'),
        )
Ejemplo n.º 19
0
inters_df.consultant.portfolio = sub_port(consultant_processing( \
                                            list(inters_df.consultant)).portfolio)

inters_df.consultant = cons_predictor(consultant_processing( \
                                        list(inters_df.consultant)))

inters_df = pd.concat([
    inters_df.drop(["client", "duration", "ongoing", "n_transactions"],
                   axis=1),
    client_processing(list(inters_df.client))
],
                      axis=1)

inters_df.to_csv(key + "interactions.csv")

upload_file(key + "interactions.csv")

models = {}

for name, df in inters_df.groupby("consultant"):

    model = SKLearn(entry_point="training_scripts.py",
                    train_instance_type="ml.c4.xlarge",
                    role=role,
                    sagemaker_session=sagemaker_session,
                    hyperparameters={"normalize": True})

    model_fit = model.fit({"train": df})
    models[name] = model_fit.deploy(initial_instance_count=1,
                                    instance_type="ml.m4.xlarge")
Ejemplo n.º 20
0
def get_pipeline(
    region,
    sagemaker_session,
    role=None,
    default_bucket=None,
    model_package_group_name="sts-sklearn-grp",
    pipeline_name="stsPipeline",
    base_job_prefix="sts",
) -> Pipeline:
    """Gets a SageMaker ML Pipeline instance working with on sts data.

    Args:
        region: AWS region to create and run the pipeline.
        role: IAM role to create and run steps and pipeline.
        default_bucket: the bucket to use for storing the artifacts

    Returns:
        an instance of a pipeline
    """
    """
        Instance types allowed:
        
        ml.r5.12xlarge, ml.m5.4xlarge, ml.p2.xlarge, ml.m4.16xlarge, ml.r5.24xlarge, 
        ml.t3.xlarge, ml.r5.16xlarge, ml.m5.large, ml.p3.16xlarge, ml.p2.16xlarge, 
        ml.c4.2xlarge, ml.c5.2xlarge, ml.c4.4xlarge, ml.c5.4xlarge, ml.c4.8xlarge, 
        ml.c5.9xlarge, ml.c5.xlarge, ml.c4.xlarge, ml.t3.2xlarge, ml.t3.medium, 
        ml.c5.18xlarge, ml.r5.2xlarge, ml.p3.2xlarge, ml.m5.xlarge, ml.m4.10xlarge, 
        ml.r5.4xlarge, ml.m5.12xlarge, ml.m4.xlarge, ml.t3.large, ml.m5.24xlarge, 
        ml.m4.2xlarge, ml.m5.2xlarge, ml.p2.8xlarge, ml.r5.8xlarge, ml.r5.xlarge, 
        ml.r5.large, ml.p3.8xlarge, ml.m4.4xlarge

        see
        https://aws.amazon.com/blogs/machine-learning/right-sizing-resources-and-avoiding-unnecessary-costs-in-amazon-sagemaker/
    """
    sagemaker_session = get_session(region, default_bucket)
    if role is None:
        role = sagemaker.session.get_execution_role(sagemaker_session)

    # parameters for pipeline execution
    processing_instance_count = ParameterInteger(
        name="ProcessingInstanceCount", default_value=1)
    processing_instance_type = ParameterString(name="ProcessingInstanceType",
                                               default_value="ml.m5.xlarge")

    # as of free tier of 50 hours of m4.xlarge or m5.xlarge instances
    training_instance_type = ParameterString(name="TrainingInstanceType",
                                             default_value="ml.m5.xlarge")
    model_approval_status = ParameterString(name="ModelApprovalStatus",
                                            default_value="Approved")

    # preprocess

    # preprocess input data
    input_data = ParameterString(
        name="InputDataUrl",
        default_value=f"s3://sts-datwit-dataset/stsmsrpc.txt",
    )

    # processing step for feature engineering
    sklearn_processor = SKLearnProcessor(
        framework_version="0.23-1",
        instance_type=processing_instance_type,
        instance_count=processing_instance_count,
        base_job_name=f"{base_job_prefix}/sklearn-sts-preprocess",
        sagemaker_session=sagemaker_session,
        role=role,
    )

    step_preprocess = ProcessingStep(
        name="PreprocessSTSData",
        processor=sklearn_processor,
        outputs=[
            ProcessingOutput(output_name="train",
                             source="/opt/ml/processing/train"),
            ProcessingOutput(output_name="validation",
                             source="/opt/ml/processing/validation"),
            ProcessingOutput(output_name="test",
                             source="/opt/ml/processing/test"),
        ],
        code=os.path.join(BASE_DIR, "preprocess.py"),
        job_arguments=["--input-data", input_data],
    )

    # training step for generating model artifacts
    model_path = f"s3://{sagemaker_session.default_bucket()}/{base_job_prefix}/stsTrain"
    image_uri = sagemaker.image_uris.retrieve(
        framework="sklearn",
        region=region,
        version="0.23-1",
        py_version="py3",
        instance_type=training_instance_type,
    )

    sklearn_estimator = SKLearn(
        entry_point='training.py',
        source_dir=BASE_DIR,
        instance_type=training_instance_type,
        instance_count=1,
        output_path=model_path,
        framework_version="0.23-1",
        py_version="py3",
        base_job_name=f"{base_job_prefix}/sts-train",
        sagemaker_session=sagemaker_session,
        role=role,
    )

    step_train = TrainingStep(
        name="TrainSTSModel",
        estimator=sklearn_estimator,
        inputs={
            "train":
            TrainingInput(
                s3_data=step_preprocess.properties.ProcessingOutputConfig.
                Outputs["train"].S3Output.S3Uri,
                content_type="text/csv",
            ),
            "validation":
            TrainingInput(
                s3_data=step_preprocess.properties.ProcessingOutputConfig.
                Outputs["validation"].S3Output.S3Uri,
                content_type="text/csv",
            ),
        },
    )

    # processing step for evaluation
    script_eval = ScriptProcessor(
        image_uri=image_uri,
        command=["python3"],
        instance_type=processing_instance_type,
        instance_count=1,
        base_job_name=f"{base_job_prefix}/script-sts-eval",
        sagemaker_session=sagemaker_session,
        role=role,
    )
    evaluation_report = PropertyFile(
        name="stsEvaluationReport",
        output_name="evaluation",
        path="evaluation.json",
    )
    step_eval = ProcessingStep(
        name="EvaluateSTSModel",
        processor=script_eval,
        inputs=[
            ProcessingInput(
                source=step_train.properties.ModelArtifacts.S3ModelArtifacts,
                destination="/opt/ml/processing/model",
            ),
            ProcessingInput(
                source=step_preprocess.properties.ProcessingOutputConfig.
                Outputs["test"].S3Output.S3Uri,
                destination="/opt/ml/processing/test",
            ),
        ],
        outputs=[
            ProcessingOutput(output_name="evaluation",
                             source="/opt/ml/processing/evaluation"),
        ],
        code=os.path.join(BASE_DIR, "evaluate.py"),
        property_files=[evaluation_report],
    )

    # setup model quality monitoring baseline data
    script_process_baseline_data = ScriptProcessor(
        image_uri=image_uri,
        command=["python3"],
        instance_type=processing_instance_type,
        instance_count=1,
        base_job_name=f"{base_job_prefix}/baseline",
        sagemaker_session=sagemaker_session,
        role=role,
    )

    step_proccess_baseline_data = ProcessingStep(
        name="SetupMonitoringData",
        processor=script_process_baseline_data,
        inputs=[
            ProcessingInput(
                source=step_train.properties.ModelArtifacts.S3ModelArtifacts,
                destination="/opt/ml/processing/model",
            ),
            ProcessingInput(
                source=step_preprocess.properties.ProcessingOutputConfig.
                Outputs["validation"].S3Output.S3Uri,
                destination="/opt/ml/processing/validation",
            ),
        ],
        outputs=[
            ProcessingOutput(output_name="validate",
                             source="/opt/ml/processing/validate"),
        ],
        code=os.path.join(BASE_DIR, "baseline.py"))
    # ---

    # register model step that will be conditionally executed
    model_metrics = ModelMetrics(
        model_statistics=MetricsSource(s3_uri="{}/evaluation.json".format(
            step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0]
            ["S3Output"]["S3Uri"]),
                                       content_type="application/json"))

    step_register = RegisterModel(
        name="RegisterSTSModel",
        estimator=sklearn_estimator,
        model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
        content_types=["text/csv"],
        response_types=["text/csv"],
        inference_instances=["ml.m5.xlarge"],
        transform_instances=["ml.m5.xlarge"],
        model_package_group_name=model_package_group_name,
        approval_status=model_approval_status,
        model_metrics=model_metrics,
    )

    # condition step for evaluating model quality and branching execution
    cond_lte = ConditionLessThanOrEqualTo(
        left=JsonGet(step=step_eval,
                     property_file=evaluation_report,
                     json_path="regression_metrics.mse.value"),
        right=6.0,
    )
    step_cond = ConditionStep(
        name="CheckMSESTSEvaluation",
        conditions=[cond_lte],
        if_steps=[step_register, step_proccess_baseline_data],
        # if_steps=[step_register],
        else_steps=[],
    )

    # pipeline instance
    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[
            processing_instance_type,
            processing_instance_count,
            training_instance_type,
            model_approval_status,
            input_data,
        ],
        steps=[step_preprocess, step_train, step_eval, step_cond],
        sagemaker_session=sagemaker_session,
    )
    return pipeline
Ejemplo n.º 21
0
from sagemaker.sklearn import SKLearn
# Initialise SDK
sklearn_estimator = SKLearn(
    entry_point='train_and_deploy.py',
    role='arn:aws:iam::<your-sagemaker-role>',
    # train_instance_type='ml.m4.xlarge',
    train_instance_type='local',
    output_path='s3://<path-to-output-dir>/',
    hyperparameters={
        'sagemaker_submit_directory':
        's3://<path-to-sagemaker_submit_directory>'
    },
    code_location='s3://<path-to-code_location>',
    framework_version='0.20.0')
# Run model training job
sklearn_estimator.fit({'train': 's3://<path-to-training-data-dir>'})
# Deploy trained model to an endpoint
predictor = sklearn_estimator.deploy(
    # instance_type='ml.t2.medium',
    instance_type='local',
    initial_instance_count=1,
    endpoint_name='<your-end-point-name>',
)