def test_create_model_from_estimator(sagemaker_session, sklearn_version):
    container_log_level = '"logging.INFO"'
    source_dir = 's3://mybucket/source'
    sklearn = SKLearn(entry_point=SCRIPT_PATH,
                      role=ROLE,
                      sagemaker_session=sagemaker_session,
                      train_instance_type=INSTANCE_TYPE,
                      framework_version=sklearn_version,
                      container_log_level=container_log_level,
                      py_version=PYTHON_VERSION,
                      base_job_name='job',
                      source_dir=source_dir)

    job_name = 'new_name'
    sklearn.fit(inputs='s3://mybucket/train', job_name=job_name)
    model = sklearn.create_model()

    assert model.sagemaker_session == sagemaker_session
    assert model.framework_version == sklearn_version
    assert model.py_version == sklearn.py_version
    assert model.entry_point == SCRIPT_PATH
    assert model.role == ROLE
    assert model.name == job_name
    assert model.container_log_level == container_log_level
    assert model.source_dir == source_dir
    assert model.vpc_config is None
def test_create_model_with_optional_params(sagemaker_session):
    container_log_level = '"logging.INFO"'
    source_dir = 's3://mybucket/source'
    enable_cloudwatch_metrics = 'true'
    sklearn = SKLearn(entry_point=SCRIPT_PATH,
                      role=ROLE,
                      sagemaker_session=sagemaker_session,
                      train_instance_type=INSTANCE_TYPE,
                      container_log_level=container_log_level,
                      py_version=PYTHON_VERSION,
                      base_job_name='job',
                      source_dir=source_dir,
                      enable_cloudwatch_metrics=enable_cloudwatch_metrics)

    sklearn.fit(inputs='s3://mybucket/train', job_name='new_name')

    new_role = 'role'
    model_server_workers = 2
    vpc_config = {'Subnets': ['foo'], 'SecurityGroupIds': ['bar']}
    model = sklearn.create_model(role=new_role,
                                 model_server_workers=model_server_workers,
                                 vpc_config_override=vpc_config)

    assert model.role == new_role
    assert model.model_server_workers == model_server_workers
    assert model.vpc_config == vpc_config
def test_create_model_with_optional_params(sagemaker_session):
    container_log_level = '"logging.INFO"'
    source_dir = "s3://mybucket/source"
    enable_cloudwatch_metrics = "true"
    sklearn = SKLearn(
        entry_point=SCRIPT_PATH,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        train_instance_type=INSTANCE_TYPE,
        container_log_level=container_log_level,
        py_version=PYTHON_VERSION,
        base_job_name="job",
        source_dir=source_dir,
        enable_cloudwatch_metrics=enable_cloudwatch_metrics,
    )

    sklearn.fit(inputs="s3://mybucket/train", job_name="new_name")

    new_role = "role"
    model_server_workers = 2
    vpc_config = {"Subnets": ["foo"], "SecurityGroupIds": ["bar"]}
    model = sklearn.create_model(role=new_role,
                                 model_server_workers=model_server_workers,
                                 vpc_config_override=vpc_config)

    assert model.role == new_role
    assert model.model_server_workers == model_server_workers
    assert model.vpc_config == vpc_config
def test_sklearn(strftime, sagemaker_session, sklearn_version):
    sklearn = SKLearn(
        entry_point=SCRIPT_PATH,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        train_instance_type=INSTANCE_TYPE,
        py_version=PYTHON_VERSION,
        framework_version=sklearn_version,
    )

    inputs = "s3://mybucket/train"

    sklearn.fit(inputs=inputs, experiment_config=EXPERIMENT_CONFIG)

    sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
    assert sagemaker_call_names == ["train", "logs_for_job"]
    boto_call_names = [
        c[0] for c in sagemaker_session.boto_session.method_calls
    ]
    assert boto_call_names == ["resource"]

    expected_train_args = _create_train_job(sklearn_version)
    expected_train_args["input_config"][0]["DataSource"]["S3DataSource"][
        "S3Uri"] = inputs
    expected_train_args["experiment_config"] = EXPERIMENT_CONFIG

    actual_train_args = sagemaker_session.method_calls[0][2]
    assert actual_train_args == expected_train_args

    model = sklearn.create_model()

    expected_image_base = (
        "246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-scikit-learn:{}-cpu-{}"
    )
    assert {
        "Environment": {
            "SAGEMAKER_SUBMIT_DIRECTORY":
            "s3://mybucket/sagemaker-scikit-learn-{}/source/sourcedir.tar.gz".
            format(TIMESTAMP),
            "SAGEMAKER_PROGRAM":
            "dummy_script.py",
            "SAGEMAKER_ENABLE_CLOUDWATCH_METRICS":
            "false",
            "SAGEMAKER_REGION":
            "us-west-2",
            "SAGEMAKER_CONTAINER_LOG_LEVEL":
            "20",
        },
        "Image": expected_image_base.format(sklearn_version, PYTHON_VERSION),
        "ModelDataUrl": "s3://m/m.tar.gz",
    } == model.prepare_container_def(CPU)

    assert "cpu" in model.prepare_container_def(CPU)["Image"]
    predictor = sklearn.deploy(1, CPU)
    assert isinstance(predictor, SKLearnPredictor)
def test_sklearn(strftime, sagemaker_session, sklearn_version):
    sklearn = SKLearn(entry_point=SCRIPT_PATH,
                      role=ROLE,
                      sagemaker_session=sagemaker_session,
                      train_instance_type=INSTANCE_TYPE,
                      py_version=PYTHON_VERSION,
                      framework_version=sklearn_version)

    inputs = 's3://mybucket/train'

    sklearn.fit(inputs=inputs)

    sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
    assert sagemaker_call_names == ['train', 'logs_for_job']
    boto_call_names = [
        c[0] for c in sagemaker_session.boto_session.method_calls
    ]
    assert boto_call_names == ['resource']

    expected_train_args = _create_train_job(sklearn_version)
    expected_train_args['input_config'][0]['DataSource']['S3DataSource'][
        'S3Uri'] = inputs

    actual_train_args = sagemaker_session.method_calls[0][2]
    assert actual_train_args == expected_train_args

    model = sklearn.create_model()

    expected_image_base = '246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-scikit-learn:{}-cpu-{}'
    assert {
        'Environment': {
            'SAGEMAKER_SUBMIT_DIRECTORY':
            's3://mybucket/sagemaker-scikit-learn-{}/source/sourcedir.tar.gz'.
            format(TIMESTAMP),
            'SAGEMAKER_PROGRAM':
            'dummy_script.py',
            'SAGEMAKER_ENABLE_CLOUDWATCH_METRICS':
            'false',
            'SAGEMAKER_REGION':
            'us-west-2',
            'SAGEMAKER_CONTAINER_LOG_LEVEL':
            '20'
        },
        'Image': expected_image_base.format(sklearn_version, PYTHON_VERSION),
        'ModelDataUrl': 's3://m/m.tar.gz'
    } == model.prepare_container_def(CPU)

    assert 'cpu' in model.prepare_container_def(CPU)['Image']
    predictor = sklearn.deploy(1, CPU)
    assert isinstance(predictor, SKLearnPredictor)
def test_create_model_with_custom_image(sagemaker_session):
    container_log_level = '"logging.INFO"'
    source_dir = 's3://mybucket/source'
    custom_image = 'ubuntu:latest'
    sklearn = SKLearn(entry_point=SCRIPT_PATH,
                      role=ROLE,
                      sagemaker_session=sagemaker_session,
                      train_instance_type=INSTANCE_TYPE,
                      image_name=custom_image,
                      container_log_level=container_log_level,
                      py_version=PYTHON_VERSION,
                      base_job_name='job',
                      source_dir=source_dir)

    sklearn.fit(inputs='s3://mybucket/train', job_name='new_name')
    model = sklearn.create_model()

    assert model.image == custom_image
Beispiel #7
0
def test_create_model_with_optional_params(sagemaker_session):
    container_log_level = '"logging.INFO"'
    source_dir = "s3://mybucket/source"
    enable_cloudwatch_metrics = "true"
    sklearn = SKLearn(
        entry_point=SCRIPT_PATH,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        train_instance_type=INSTANCE_TYPE,
        container_log_level=container_log_level,
        py_version=PYTHON_VERSION,
        base_job_name="job",
        source_dir=source_dir,
        enable_cloudwatch_metrics=enable_cloudwatch_metrics,
    )

    sklearn.fit(inputs="s3://mybucket/train", job_name="new_name")

    custom_image = "ubuntu:latest"
    new_role = "role"
    model_server_workers = 2
    vpc_config = {"Subnets": ["foo"], "SecurityGroupIds": ["bar"]}
    new_source_dir = "s3://myotherbucket/source"
    dependencies = ["/directory/a", "/directory/b"]
    model_name = "model-name"
    model = sklearn.create_model(
        image=custom_image,
        role=new_role,
        model_server_workers=model_server_workers,
        vpc_config_override=vpc_config,
        entry_point=SERVING_SCRIPT_FILE,
        source_dir=new_source_dir,
        dependencies=dependencies,
        name=model_name,
    )

    assert model.image == custom_image
    assert model.role == new_role
    assert model.model_server_workers == model_server_workers
    assert model.vpc_config == vpc_config
    assert model.entry_point == SERVING_SCRIPT_FILE
    assert model.source_dir == new_source_dir
    assert model.dependencies == dependencies
    assert model.name == model_name
def test_create_model_with_custom_image(sagemaker_session):
    container_log_level = '"logging.INFO"'
    source_dir = "s3://mybucket/source"
    custom_image = "ubuntu:latest"
    sklearn = SKLearn(
        entry_point=SCRIPT_PATH,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        instance_type=INSTANCE_TYPE,
        image_uri=custom_image,
        container_log_level=container_log_level,
        py_version=PYTHON_VERSION,
        base_job_name="job",
        source_dir=source_dir,
    )

    sklearn.fit(inputs="s3://mybucket/train", job_name="new_name")
    model = sklearn.create_model()

    assert model.image_uri == custom_image
def test_create_model_from_estimator(name_from_base, sagemaker_session,
                                     sklearn_version):
    container_log_level = '"logging.INFO"'
    source_dir = "s3://mybucket/source"
    base_job_name = "job"

    sklearn = SKLearn(
        entry_point=SCRIPT_PATH,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        instance_type=INSTANCE_TYPE,
        framework_version=sklearn_version,
        container_log_level=container_log_level,
        py_version=PYTHON_VERSION,
        base_job_name=base_job_name,
        source_dir=source_dir,
        enable_network_isolation=True,
    )

    sklearn.fit(inputs="s3://mybucket/train", job_name="new_name")

    model_name = "model_name"
    name_from_base.return_value = model_name
    model = sklearn.create_model()

    assert model.sagemaker_session == sagemaker_session
    assert model.framework_version == sklearn_version
    assert model.py_version == sklearn.py_version
    assert model.entry_point == SCRIPT_PATH
    assert model.role == ROLE
    assert model.name == model_name
    assert model.container_log_level == container_log_level
    assert model.source_dir == source_dir
    assert model.vpc_config is None
    assert model.enable_network_isolation()

    name_from_base.assert_called_with(base_job_name)