예제 #1
0
def test_inference_pipeline_batch_transform(sagemaker_session):
    sparkml_model_data = sagemaker_session.upload_data(
        path=os.path.join(SPARKML_DATA_PATH, 'mleap_model.tar.gz'),
        key_prefix='integ-test-data/sparkml/model')
    xgb_model_data = sagemaker_session.upload_data(
        path=os.path.join(XGBOOST_DATA_PATH, 'xgb_model.tar.gz'),
        key_prefix='integ-test-data/xgboost/model')
    batch_job_name = 'test-inference-pipeline-batch-{}'.format(
        sagemaker_timestamp())
    sparkml_model = SparkMLModel(model_data=sparkml_model_data,
                                 env={'SAGEMAKER_SPARKML_SCHEMA': SCHEMA},
                                 sagemaker_session=sagemaker_session)
    xgb_image = get_image_uri(sagemaker_session.boto_region_name, 'xgboost')
    xgb_model = Model(model_data=xgb_model_data,
                      image=xgb_image,
                      sagemaker_session=sagemaker_session)
    model = PipelineModel(models=[sparkml_model, xgb_model],
                          role='SageMakerRole',
                          sagemaker_session=sagemaker_session,
                          name=batch_job_name)
    transformer = model.transformer(1, 'ml.m4.xlarge')
    transform_input_key_prefix = 'integ-test-data/sparkml_xgboost/transform'
    transform_input = transformer.sagemaker_session.upload_data(
        path=VALID_DATA_PATH, key_prefix=transform_input_key_prefix)

    with timeout_and_delete_model_with_transformer(
            transformer,
            sagemaker_session,
            minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES):
        transformer.transform(transform_input,
                              content_type=CONTENT_TYPE_CSV,
                              job_name=batch_job_name)
        transformer.wait()
def test_transform_byo_estimator(sagemaker_session, cpu_instance_type):
    data_path = os.path.join(DATA_DIR, "one_p_mnist")
    pickle_args = {} if sys.version_info.major == 2 else {"encoding": "latin1"}
    tags = [{"Key": "some-tag", "Value": "value-for-tag"}]

    # Load the data into memory as numpy arrays
    train_set_path = os.path.join(data_path, "mnist.pkl.gz")
    with gzip.open(train_set_path, "rb") as f:
        train_set, _, _ = pickle.load(f, **pickle_args)

    kmeans = KMeans(
        role="SageMakerRole",
        train_instance_count=1,
        train_instance_type=cpu_instance_type,
        k=10,
        sagemaker_session=sagemaker_session,
        output_path="s3://{}/".format(sagemaker_session.default_bucket()),
    )

    # set kmeans specific hp
    kmeans.init_method = "random"
    kmeans.max_iterators = 1
    kmeans.tol = 1
    kmeans.num_trials = 1
    kmeans.local_init_method = "kmeans++"
    kmeans.half_life_time_size = 1
    kmeans.epochs = 1

    records = kmeans.record_set(train_set[0][:100])

    job_name = unique_name_from_base("test-kmeans-attach")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        kmeans.fit(records, job_name=job_name)

    estimator = Estimator.attach(training_job_name=job_name, sagemaker_session=sagemaker_session)
    estimator._enable_network_isolation = True

    transform_input_path = os.path.join(data_path, "transform_input.csv")
    transform_input_key_prefix = "integ-test-data/one_p_mnist/transform"
    transform_input = kmeans.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix
    )

    transformer = estimator.transformer(1, cpu_instance_type, tags=tags)
    transformer.transform(transform_input, content_type="text/csv")

    with timeout_and_delete_model_with_transformer(
        transformer, sagemaker_session, minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES
    ):
        transformer.wait()
        model_desc = sagemaker_session.sagemaker_client.describe_model(
            ModelName=transformer.model_name
        )
        assert model_desc["EnableNetworkIsolation"]

        model_tags = sagemaker_session.sagemaker_client.list_tags(
            ResourceArn=model_desc["ModelArn"]
        )["Tags"]
        assert tags == model_tags
예제 #3
0
def test_transform_mxnet(mxnet_estimator, mxnet_transform_input,
                         sagemaker_session, cpu_instance_type):
    kms_key_arn = get_or_create_kms_key(sagemaker_session)
    output_filter = "$"
    input_filter = "$"

    transformer = _create_transformer_and_transform_job(
        mxnet_estimator,
        mxnet_transform_input,
        cpu_instance_type,
        kms_key_arn,
        input_filter=input_filter,
        output_filter=output_filter,
        join_source=None,
    )
    with timeout_and_delete_model_with_transformer(
            transformer,
            sagemaker_session,
            minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES):
        transformer.wait()

    job_desc = transformer.sagemaker_session.describe_transform_job(
        job_name=transformer.latest_transform_job.name)
    assert kms_key_arn == job_desc["TransformResources"]["VolumeKmsKeyId"]
    assert output_filter == job_desc["DataProcessing"]["OutputFilter"]
    assert input_filter == job_desc["DataProcessing"]["InputFilter"]
def test_transform_pytorch_vpc_custom_model_bucket(
    sagemaker_session,
    pytorch_inference_latest_version,
    pytorch_inference_latest_py_version,
    cpu_instance_type,
    custom_bucket_name,
):
    data_dir = os.path.join(DATA_DIR, "pytorch_mnist")

    ec2_client = sagemaker_session.boto_session.client("ec2")
    subnet_ids, security_group_id = get_or_create_vpc_resources(ec2_client)

    model_data = sagemaker_session.upload_data(
        path=os.path.join(data_dir, "model.tar.gz"),
        bucket=custom_bucket_name,
        key_prefix="integ-test-data/pytorch_mnist/model",
    )

    model = PyTorchModel(
        model_data=model_data,
        entry_point=os.path.join(data_dir, "mnist.py"),
        role="SageMakerRole",
        framework_version=pytorch_inference_latest_version,
        py_version=pytorch_inference_latest_py_version,
        sagemaker_session=sagemaker_session,
        vpc_config={
            "Subnets": subnet_ids,
            "SecurityGroupIds": [security_group_id]
        },
        code_location="s3://{}".format(custom_bucket_name),
    )

    transform_input = sagemaker_session.upload_data(
        path=os.path.join(data_dir, "transform", "data.npy"),
        key_prefix="integ-test-data/pytorch_mnist/transform",
    )

    transformer = model.transformer(1, cpu_instance_type)
    transformer.transform(
        transform_input,
        content_type="application/x-npy",
        job_name=unique_name_from_base("test-transform-vpc"),
    )

    with timeout_and_delete_model_with_transformer(
            transformer,
            sagemaker_session,
            minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES):
        transformer.wait()
        model_desc = sagemaker_session.sagemaker_client.describe_model(
            ModelName=transformer.model_name)
        assert set(subnet_ids) == set(model_desc["VpcConfig"]["Subnets"])
        assert [security_group_id
                ] == model_desc["VpcConfig"]["SecurityGroupIds"]

        model_bucket, _ = s3.parse_s3_url(
            model_desc["PrimaryContainer"]["ModelDataUrl"])
        assert custom_bucket_name == model_bucket
예제 #5
0
def test_transform_byo_estimator(sagemaker_session):
    data_path = os.path.join(DATA_DIR, 'one_p_mnist')
    pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'}
    tags = [{'Key': 'some-tag', 'Value': 'value-for-tag'}]

    # Load the data into memory as numpy arrays
    train_set_path = os.path.join(data_path, 'mnist.pkl.gz')
    with gzip.open(train_set_path, 'rb') as f:
        train_set, _, _ = pickle.load(f, **pickle_args)

    kmeans = KMeans(role='SageMakerRole',
                    train_instance_count=1,
                    train_instance_type='ml.c4.xlarge',
                    k=10,
                    sagemaker_session=sagemaker_session,
                    output_path='s3://{}/'.format(
                        sagemaker_session.default_bucket()))

    # set kmeans specific hp
    kmeans.init_method = 'random'
    kmeans.max_iterators = 1
    kmeans.tol = 1
    kmeans.num_trials = 1
    kmeans.local_init_method = 'kmeans++'
    kmeans.half_life_time_size = 1
    kmeans.epochs = 1

    records = kmeans.record_set(train_set[0][:100])

    job_name = unique_name_from_base('test-kmeans-attach')

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        kmeans.fit(records, job_name=job_name)

    transform_input_path = os.path.join(data_path, 'transform_input.csv')
    transform_input_key_prefix = 'integ-test-data/one_p_mnist/transform'
    transform_input = kmeans.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix)

    estimator = Estimator.attach(training_job_name=job_name,
                                 sagemaker_session=sagemaker_session)

    transformer = estimator.transformer(1, 'ml.m4.xlarge', tags=tags)
    transformer.transform(transform_input, content_type='text/csv')

    with timeout_and_delete_model_with_transformer(
            transformer,
            sagemaker_session,
            minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES):
        transformer.wait()
        model_desc = sagemaker_session.sagemaker_client.describe_model(
            ModelName=transformer.model_name)
        model_tags = sagemaker_session.sagemaker_client.list_tags(
            ResourceArn=model_desc['ModelArn'])['Tags']
        assert tags == model_tags
def test_transform_mxnet(sagemaker_session, mxnet_full_version, cpu_instance_type):
    data_path = os.path.join(DATA_DIR, "mxnet_mnist")
    script_path = os.path.join(data_path, "mnist.py")

    mx = MXNet(
        entry_point=script_path,
        role="SageMakerRole",
        train_instance_count=1,
        train_instance_type=cpu_instance_type,
        sagemaker_session=sagemaker_session,
        framework_version=mxnet_full_version,
    )

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "train"), key_prefix="integ-test-data/mxnet_mnist/train"
    )
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "test"), key_prefix="integ-test-data/mxnet_mnist/test"
    )
    job_name = unique_name_from_base("test-mxnet-transform")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        mx.fit({"train": train_input, "test": test_input}, job_name=job_name)

    transform_input_path = os.path.join(data_path, "transform", "data.csv")
    transform_input_key_prefix = "integ-test-data/mxnet_mnist/transform"
    transform_input = mx.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix
    )

    kms_key_arn = get_or_create_kms_key(sagemaker_session)
    output_filter = "$"
    input_filter = "$"

    transformer = _create_transformer_and_transform_job(
        mx,
        transform_input,
        cpu_instance_type,
        kms_key_arn,
        input_filter=input_filter,
        output_filter=output_filter,
        join_source=None,
    )
    with timeout_and_delete_model_with_transformer(
        transformer, sagemaker_session, minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES
    ):
        transformer.wait()

    job_desc = transformer.sagemaker_session.sagemaker_client.describe_transform_job(
        TransformJobName=transformer.latest_transform_job.name
    )
    assert kms_key_arn == job_desc["TransformResources"]["VolumeKmsKeyId"]
    assert output_filter == job_desc["DataProcessing"]["OutputFilter"]
    assert input_filter == job_desc["DataProcessing"]["InputFilter"]
def test_transform_mxnet_logs(
    mxnet_estimator, mxnet_transform_input, sagemaker_session, cpu_instance_type
):
    with timeout(minutes=45):
        transformer = _create_transformer_and_transform_job(
            mxnet_estimator, mxnet_transform_input, cpu_instance_type, wait=True, logs=True
        )

    with timeout_and_delete_model_with_transformer(
        transformer, sagemaker_session, minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES
    ):
        transformer.wait()
def test_timeout_and_delete_model_with_transformer_does_not_throw_when_method_ends_gracefully(
        _show_logs, _cleanup_logs, session, transformer):
    with timeout_and_delete_model_with_transformer(
            sagemaker_session=session,
            transformer=transformer,
            hours=0,
            minutes=0,
            seconds=LONG_TIMEOUT_THAT_WILL_NEVER_BE_EXCEEDED,
            sleep_between_cleanup_attempts=0,
    ):
        pass
    assert transformer.delete_model.call_count == 1
def test_timeout_and_delete_model_with_transformer_throws_timeout_exception_when_method_times_out(
        _show_logs, _cleanup_logs, session, transformer):
    with pytest.raises(stopit.utils.TimeoutException):
        with timeout_and_delete_model_with_transformer(
                sagemaker_session=session,
                transformer=transformer,
                hours=0,
                minutes=0,
                seconds=SHORT_TIMEOUT_TO_FORCE_TIMEOUT_TO_OCCUR,
                sleep_between_cleanup_attempts=0,
        ):
            time.sleep(LONG_DURATION_TO_EXCEED_TIMEOUT)
예제 #10
0
def test_transform_mxnet_vpc(sagemaker_session, mxnet_full_version):
    data_path = os.path.join(DATA_DIR, "mxnet_mnist")
    script_path = os.path.join(data_path, "mnist.py")

    ec2_client = sagemaker_session.boto_session.client("ec2")
    subnet_ids, security_group_id = get_or_create_vpc_resources(
        ec2_client, sagemaker_session.boto_session.region_name
    )

    mx = MXNet(
        entry_point=script_path,
        role="SageMakerRole",
        train_instance_count=1,
        train_instance_type="ml.c4.xlarge",
        sagemaker_session=sagemaker_session,
        framework_version=mxnet_full_version,
        subnets=subnet_ids,
        security_group_ids=[security_group_id],
    )

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "train"), key_prefix="integ-test-data/mxnet_mnist/train"
    )
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "test"), key_prefix="integ-test-data/mxnet_mnist/test"
    )
    job_name = unique_name_from_base("test-mxnet-vpc")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        mx.fit({"train": train_input, "test": test_input}, job_name=job_name)

    job_desc = sagemaker_session.sagemaker_client.describe_training_job(
        TrainingJobName=mx.latest_training_job.name
    )
    assert set(subnet_ids) == set(job_desc["VpcConfig"]["Subnets"])
    assert [security_group_id] == job_desc["VpcConfig"]["SecurityGroupIds"]

    transform_input_path = os.path.join(data_path, "transform", "data.csv")
    transform_input_key_prefix = "integ-test-data/mxnet_mnist/transform"
    transform_input = mx.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix
    )

    transformer = _create_transformer_and_transform_job(mx, transform_input)
    with timeout_and_delete_model_with_transformer(
        transformer, sagemaker_session, minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES
    ):
        transformer.wait()
        model_desc = sagemaker_session.sagemaker_client.describe_model(
            ModelName=transformer.model_name
        )
        assert set(subnet_ids) == set(model_desc["VpcConfig"]["Subnets"])
        assert [security_group_id] == model_desc["VpcConfig"]["SecurityGroupIds"]
def test_timeout_and_delete_model_with_transformer_fails_when_method_throws_exception(
        _show_logs, _cleanup_logs, session, transformer):
    with pytest.raises(ValueError) as exception:
        with timeout_and_delete_model_with_transformer(
                sagemaker_session=session,
                transformer=transformer,
                hours=0,
                minutes=1,
                sleep_between_cleanup_attempts=0,
        ):
            raise ValueError(EXCEPTION_MESSAGE)
        assert EXCEPTION_MESSAGE in str(exception.value)
    assert transformer.delete_model.call_count == 1
예제 #12
0
def test_attach_transform_kmeans(sagemaker_session, cpu_instance_type):
    data_path = os.path.join(DATA_DIR, "one_p_mnist")
    pickle_args = {} if sys.version_info.major == 2 else {"encoding": "latin1"}

    # Load the data into memory as numpy arrays
    train_set_path = os.path.join(data_path, "mnist.pkl.gz")
    with gzip.open(train_set_path, "rb") as f:
        train_set, _, _ = pickle.load(f, **pickle_args)

    kmeans = KMeans(
        role="SageMakerRole",
        train_instance_count=1,
        train_instance_type=cpu_instance_type,
        k=10,
        sagemaker_session=sagemaker_session,
        output_path="s3://{}/".format(sagemaker_session.default_bucket()),
    )

    # set kmeans specific hp
    kmeans.init_method = "random"
    kmeans.max_iterators = 1
    kmeans.tol = 1
    kmeans.num_trials = 1
    kmeans.local_init_method = "kmeans++"
    kmeans.half_life_time_size = 1
    kmeans.epochs = 1

    records = kmeans.record_set(train_set[0][:100])

    job_name = unique_name_from_base("test-kmeans-attach")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        kmeans.fit(records, job_name=job_name)

    transform_input_path = os.path.join(data_path, "transform_input.csv")
    transform_input_key_prefix = "integ-test-data/one_p_mnist/transform"
    transform_input = kmeans.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix)

    transformer = _create_transformer_and_transform_job(
        kmeans, transform_input, cpu_instance_type)

    attached_transformer = Transformer.attach(
        transformer.latest_transform_job.name,
        sagemaker_session=sagemaker_session)
    with timeout_and_delete_model_with_transformer(
            transformer,
            sagemaker_session,
            minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES):
        attached_transformer.wait()
def test_single_transformer_multiple_jobs(sagemaker_session,
                                          mxnet_full_version,
                                          cpu_instance_type):
    data_path = os.path.join(DATA_DIR, "mxnet_mnist")
    script_path = os.path.join(data_path, "mnist.py")

    mx = MXNet(
        entry_point=script_path,
        role="SageMakerRole",
        train_instance_count=1,
        train_instance_type=cpu_instance_type,
        sagemaker_session=sagemaker_session,
        framework_version=mxnet_full_version,
    )

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "train"),
        key_prefix="integ-test-data/mxnet_mnist/train")
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "test"),
        key_prefix="integ-test-data/mxnet_mnist/test")
    job_name = unique_name_from_base("test-mxnet-transform")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        mx.fit({"train": train_input, "test": test_input}, job_name=job_name)

    transform_input_path = os.path.join(data_path, "transform", "data.csv")
    transform_input_key_prefix = "integ-test-data/mxnet_mnist/transform"
    transform_input = mx.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix)

    transformer = mx.transformer(1, cpu_instance_type)

    job_name = unique_name_from_base("test-mxnet-transform")
    transformer.transform(transform_input,
                          content_type="text/csv",
                          job_name=job_name)
    with timeout_and_delete_model_with_transformer(
            transformer,
            sagemaker_session,
            minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES):
        assert transformer.output_path == "s3://{}/{}".format(
            sagemaker_session.default_bucket(), job_name)
        job_name = unique_name_from_base("test-mxnet-transform")
        transformer.transform(transform_input,
                              content_type="text/csv",
                              job_name=job_name)
        assert transformer.output_path == "s3://{}/{}".format(
            sagemaker_session.default_bucket(), job_name)
예제 #14
0
def test_transform_mxnet_vpc(sagemaker_session, mxnet_full_version):
    data_path = os.path.join(DATA_DIR, 'mxnet_mnist')
    script_path = os.path.join(data_path, 'mnist.py')

    ec2_client = sagemaker_session.boto_session.client('ec2')
    subnet_ids, security_group_id = get_or_create_vpc_resources(
        ec2_client, sagemaker_session.boto_session.region_name)

    mx = MXNet(entry_point=script_path,
               role='SageMakerRole',
               train_instance_count=1,
               train_instance_type='ml.c4.xlarge',
               sagemaker_session=sagemaker_session,
               framework_version=mxnet_full_version,
               subnets=subnet_ids,
               security_group_ids=[security_group_id])

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, 'train'),
        key_prefix='integ-test-data/mxnet_mnist/train')
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, 'test'),
        key_prefix='integ-test-data/mxnet_mnist/test')

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        mx.fit({'train': train_input, 'test': test_input})

    job_desc = sagemaker_session.sagemaker_client.describe_training_job(
        TrainingJobName=mx.latest_training_job.name)
    assert set(subnet_ids) == set(job_desc['VpcConfig']['Subnets'])
    assert [security_group_id] == job_desc['VpcConfig']['SecurityGroupIds']

    transform_input_path = os.path.join(data_path, 'transform', 'data.csv')
    transform_input_key_prefix = 'integ-test-data/mxnet_mnist/transform'
    transform_input = mx.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix)

    transformer = _create_transformer_and_transform_job(mx, transform_input)
    with timeout_and_delete_model_with_transformer(
            transformer,
            sagemaker_session,
            minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES):
        transformer.wait()
        model_desc = sagemaker_session.sagemaker_client.describe_model(
            ModelName=transformer.model_name)
        assert set(subnet_ids) == set(model_desc['VpcConfig']['Subnets'])
        assert [security_group_id
                ] == model_desc['VpcConfig']['SecurityGroupIds']
예제 #15
0
def test_transform_mxnet(sagemaker_session, mxnet_full_version):
    data_path = os.path.join(DATA_DIR, 'mxnet_mnist')
    script_path = os.path.join(data_path, 'mnist.py')

    mx = MXNet(entry_point=script_path,
               role='SageMakerRole',
               train_instance_count=1,
               train_instance_type='ml.c4.xlarge',
               sagemaker_session=sagemaker_session,
               framework_version=mxnet_full_version)

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, 'train'),
        key_prefix='integ-test-data/mxnet_mnist/train')
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, 'test'),
        key_prefix='integ-test-data/mxnet_mnist/test')
    job_name = unique_name_from_base('test-mxnet-transform')

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        mx.fit({'train': train_input, 'test': test_input}, job_name=job_name)

    transform_input_path = os.path.join(data_path, 'transform', 'data.csv')
    transform_input_key_prefix = 'integ-test-data/mxnet_mnist/transform'
    transform_input = mx.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix)

    kms_key_arn = get_or_create_kms_key(sagemaker_session)
    output_filter = "$"

    transformer = _create_transformer_and_transform_job(
        mx,
        transform_input,
        kms_key_arn,
        input_filter=None,
        output_filter=output_filter,
        join_source=None)
    with timeout_and_delete_model_with_transformer(
            transformer,
            sagemaker_session,
            minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES):
        transformer.wait()

    job_desc = transformer.sagemaker_session.sagemaker_client.describe_transform_job(
        TransformJobName=transformer.latest_transform_job.name)
    assert kms_key_arn == job_desc['TransformResources']['VolumeKmsKeyId']
    assert output_filter == job_desc['DataProcessing']['OutputFilter']
예제 #16
0
def test_attach_transform_kmeans(sagemaker_session):
    data_path = os.path.join(DATA_DIR, 'one_p_mnist')
    pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'}

    # Load the data into memory as numpy arrays
    train_set_path = os.path.join(data_path, 'mnist.pkl.gz')
    with gzip.open(train_set_path, 'rb') as f:
        train_set, _, _ = pickle.load(f, **pickle_args)

    kmeans = KMeans(role='SageMakerRole',
                    train_instance_count=1,
                    train_instance_type='ml.c4.xlarge',
                    k=10,
                    sagemaker_session=sagemaker_session,
                    output_path='s3://{}/'.format(
                        sagemaker_session.default_bucket()))

    # set kmeans specific hp
    kmeans.init_method = 'random'
    kmeans.max_iterators = 1
    kmeans.tol = 1
    kmeans.num_trials = 1
    kmeans.local_init_method = 'kmeans++'
    kmeans.half_life_time_size = 1
    kmeans.epochs = 1

    records = kmeans.record_set(train_set[0][:100])
    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        kmeans.fit(records)

    transform_input_path = os.path.join(data_path, 'transform_input.csv')
    transform_input_key_prefix = 'integ-test-data/one_p_mnist/transform'
    transform_input = kmeans.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix)

    transformer = _create_transformer_and_transform_job(
        kmeans, transform_input)

    attached_transformer = Transformer.attach(
        transformer.latest_transform_job.name,
        sagemaker_session=sagemaker_session)
    with timeout_and_delete_model_with_transformer(
            transformer,
            sagemaker_session,
            minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES):
        attached_transformer.wait()
예제 #17
0
def test_transform_mxnet_tags(mxnet_estimator, mxnet_transform_input,
                              sagemaker_session, cpu_instance_type):
    tags = [{"Key": "some-tag", "Value": "value-for-tag"}]

    transformer = mxnet_estimator.transformer(1, cpu_instance_type, tags=tags)
    transformer.transform(mxnet_transform_input, content_type="text/csv")

    with timeout_and_delete_model_with_transformer(
            transformer,
            sagemaker_session,
            minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES):
        transformer.wait()
        model_desc = sagemaker_session.sagemaker_client.describe_model(
            ModelName=transformer.model_name)
        model_tags = sagemaker_session.sagemaker_client.list_tags(
            ResourceArn=model_desc["ModelArn"])["Tags"]
        assert tags == model_tags
def test_transform_mxnet_tags(sagemaker_session, mxnet_full_version, cpu_instance_type):
    data_path = os.path.join(DATA_DIR, "mxnet_mnist")
    script_path = os.path.join(data_path, "mnist.py")
    tags = [{"Key": "some-tag", "Value": "value-for-tag"}]

    mx = MXNet(
        entry_point=script_path,
        role="SageMakerRole",
        train_instance_count=1,
        train_instance_type=cpu_instance_type,
        sagemaker_session=sagemaker_session,
        framework_version=mxnet_full_version,
    )

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "train"), key_prefix="integ-test-data/mxnet_mnist/train"
    )
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "test"), key_prefix="integ-test-data/mxnet_mnist/test"
    )
    job_name = unique_name_from_base("test-mxnet-transform")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        mx.fit({"train": train_input, "test": test_input}, job_name=job_name)

    transform_input_path = os.path.join(data_path, "transform", "data.csv")
    transform_input_key_prefix = "integ-test-data/mxnet_mnist/transform"
    transform_input = mx.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix
    )

    transformer = mx.transformer(1, cpu_instance_type, tags=tags)
    transformer.transform(transform_input, content_type="text/csv")

    with timeout_and_delete_model_with_transformer(
        transformer, sagemaker_session, minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES
    ):
        transformer.wait()
        model_desc = sagemaker_session.sagemaker_client.describe_model(
            ModelName=transformer.model_name
        )
        model_tags = sagemaker_session.sagemaker_client.list_tags(
            ResourceArn=model_desc["ModelArn"]
        )["Tags"]
        assert tags == model_tags
def test_transform_model_client_config(
    mxnet_estimator, mxnet_transform_input, sagemaker_session, cpu_instance_type
):
    model_client_config = {"InvocationsTimeoutInSeconds": 60, "InvocationsMaxRetries": 2}
    transformer = mxnet_estimator.transformer(1, cpu_instance_type)
    transformer.transform(
        mxnet_transform_input, content_type="text/csv", model_client_config=model_client_config
    )

    with timeout_and_delete_model_with_transformer(
        transformer, sagemaker_session, minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES
    ):
        transformer.wait()
        transform_job_desc = sagemaker_session.sagemaker_client.describe_transform_job(
            TransformJobName=transformer.latest_transform_job.name
        )

        assert model_client_config == transform_job_desc["ModelClientConfig"]
def test_single_transformer_multiple_jobs(
    mxnet_estimator, mxnet_transform_input, sagemaker_session, cpu_instance_type
):
    transformer = mxnet_estimator.transformer(1, cpu_instance_type)

    job_name = unique_name_from_base("test-mxnet-transform")
    transformer.transform(mxnet_transform_input, content_type="text/csv", job_name=job_name)
    with timeout_and_delete_model_with_transformer(
        transformer, sagemaker_session, minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES
    ):
        assert transformer.output_path == "s3://{}/{}".format(
            sagemaker_session.default_bucket(), job_name
        )
        job_name = unique_name_from_base("test-mxnet-transform")
        transformer.transform(mxnet_transform_input, content_type="text/csv", job_name=job_name)
        assert transformer.output_path == "s3://{}/{}".format(
            sagemaker_session.default_bucket(), job_name
        )
def test_inference_pipeline_batch_transform(sagemaker_session,
                                            cpu_instance_type):
    sparkml_model_data = sagemaker_session.upload_data(
        path=os.path.join(SPARKML_DATA_PATH, "mleap_model.tar.gz"),
        key_prefix="integ-test-data/sparkml/model",
    )
    xgb_model_data = sagemaker_session.upload_data(
        path=os.path.join(XGBOOST_DATA_PATH, "xgb_model.tar.gz"),
        key_prefix="integ-test-data/xgboost/model",
    )
    batch_job_name = "test-inference-pipeline-batch-{}".format(
        sagemaker_timestamp())
    sparkml_model = SparkMLModel(
        model_data=sparkml_model_data,
        env={"SAGEMAKER_SPARKML_SCHEMA": SCHEMA},
        sagemaker_session=sagemaker_session,
    )
    xgb_image = image_uris.retrieve("xgboost",
                                    sagemaker_session.boto_region_name,
                                    version="1",
                                    image_scope="inference")
    xgb_model = Model(model_data=xgb_model_data,
                      image_uri=xgb_image,
                      sagemaker_session=sagemaker_session)
    model = PipelineModel(
        models=[sparkml_model, xgb_model],
        role="SageMakerRole",
        sagemaker_session=sagemaker_session,
        name=batch_job_name,
    )
    transformer = model.transformer(1, cpu_instance_type)
    transform_input_key_prefix = "integ-test-data/sparkml_xgboost/transform"
    transform_input = transformer.sagemaker_session.upload_data(
        path=VALID_DATA_PATH, key_prefix=transform_input_key_prefix)

    with timeout_and_delete_model_with_transformer(
            transformer,
            sagemaker_session,
            minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES):
        transformer.transform(transform_input,
                              content_type="text/csv",
                              job_name=batch_job_name)
        transformer.wait()
def test_attach_transform_kmeans(sagemaker_session, cpu_instance_type):
    kmeans = KMeans(
        role="SageMakerRole",
        instance_count=1,
        instance_type=cpu_instance_type,
        k=10,
        sagemaker_session=sagemaker_session,
        output_path="s3://{}/".format(sagemaker_session.default_bucket()),
    )

    # set kmeans specific hp
    kmeans.init_method = "random"
    kmeans.max_iterators = 1
    kmeans.tol = 1
    kmeans.num_trials = 1
    kmeans.local_init_method = "kmeans++"
    kmeans.half_life_time_size = 1
    kmeans.epochs = 1

    records = kmeans.record_set(datasets.one_p_mnist()[0][:100])

    job_name = unique_name_from_base("test-kmeans-attach")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        kmeans.fit(records, job_name=job_name)

    transform_input_path = os.path.join(DATA_DIR, "one_p_mnist",
                                        "transform_input.csv")
    transform_input_key_prefix = "integ-test-data/one_p_mnist/transform"
    transform_input = kmeans.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix)

    transformer = _create_transformer_and_transform_job(
        kmeans, transform_input, cpu_instance_type)

    attached_transformer = Transformer.attach(
        transformer.latest_transform_job.name,
        sagemaker_session=sagemaker_session)
    with timeout_and_delete_model_with_transformer(
            transformer,
            sagemaker_session,
            minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES):
        attached_transformer.wait()
예제 #23
0
def test_transform_mxnet_logs(sagemaker_session, mxnet_full_version,
                              cpu_instance_type):
    data_path = os.path.join(DATA_DIR, "mxnet_mnist")
    script_path = os.path.join(data_path, "mnist.py")

    mx = MXNet(
        entry_point=script_path,
        role="SageMakerRole",
        train_instance_count=1,
        train_instance_type=cpu_instance_type,
        sagemaker_session=sagemaker_session,
        framework_version=mxnet_full_version,
    )

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "train"),
        key_prefix="integ-test-data/mxnet_mnist/train")
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "test"),
        key_prefix="integ-test-data/mxnet_mnist/test")
    job_name = unique_name_from_base("test-mxnet-transform")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        mx.fit({"train": train_input, "test": test_input}, job_name=job_name)

    transform_input_path = os.path.join(data_path, "transform", "data.csv")
    transform_input_key_prefix = "integ-test-data/mxnet_mnist/transform"
    transform_input = mx.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix)

    with timeout(minutes=45):
        transformer = _create_transformer_and_transform_job(mx,
                                                            transform_input,
                                                            cpu_instance_type,
                                                            wait=True,
                                                            logs=True)

    with timeout_and_delete_model_with_transformer(
            transformer,
            sagemaker_session,
            minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES):
        transformer.wait()
예제 #24
0
def test_transform_mxnet(sagemaker_session, mxnet_full_version):
    data_path = os.path.join(DATA_DIR, 'mxnet_mnist')
    script_path = os.path.join(data_path, 'mnist.py')

    mx = MXNet(entry_point=script_path,
               role='SageMakerRole',
               train_instance_count=1,
               train_instance_type='ml.c4.xlarge',
               sagemaker_session=sagemaker_session,
               framework_version=mxnet_full_version)

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, 'train'),
        key_prefix='integ-test-data/mxnet_mnist/train')
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, 'test'),
        key_prefix='integ-test-data/mxnet_mnist/test')

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        mx.fit({'train': train_input, 'test': test_input})

    transform_input_path = os.path.join(data_path, 'transform', 'data.csv')
    transform_input_key_prefix = 'integ-test-data/mxnet_mnist/transform'
    transform_input = mx.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix)

    sts_client = sagemaker_session.boto_session.client('sts')
    account_id = sts_client.get_caller_identity()['Account']
    kms_client = sagemaker_session.boto_session.client('kms')
    kms_key_arn = get_or_create_kms_key(kms_client, account_id)

    transformer = _create_transformer_and_transform_job(
        mx, transform_input, kms_key_arn)
    with timeout_and_delete_model_with_transformer(
            transformer,
            sagemaker_session,
            minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES):
        transformer.wait()

    job_desc = transformer.sagemaker_session.sagemaker_client.describe_transform_job(
        TransformJobName=transformer.latest_transform_job.name)
    assert kms_key_arn == job_desc['TransformResources']['VolumeKmsKeyId']
예제 #25
0
def test_timeout_and_delete_model_with_transformer_retries_resource_deletion_on_failure(
    _show_logs, _cleanup_logs, _delete_schedules_associated_with_endpoint, session, transformer
):
    transformer.delete_model = Mock(
        side_effect=ClientError(
            error_response={"Error": {"Code": 403, "Message": "ValidationException"}},
            operation_name="Unit Test",
        )
    )

    with timeout_and_delete_model_with_transformer(
        sagemaker_session=session,
        transformer=transformer,
        hours=0,
        minutes=0,
        seconds=LONG_TIMEOUT_THAT_WILL_NEVER_BE_EXCEEDED,
        sleep_between_cleanup_attempts=0,
    ):
        pass
    assert transformer.delete_model.call_count == 3
예제 #26
0
def test_transform_mxnet_tags(sagemaker_session, mxnet_full_version):
    data_path = os.path.join(DATA_DIR, 'mxnet_mnist')
    script_path = os.path.join(data_path, 'mnist.py')
    tags = [{'Key': 'some-tag', 'Value': 'value-for-tag'}]

    mx = MXNet(entry_point=script_path,
               role='SageMakerRole',
               train_instance_count=1,
               train_instance_type='ml.c4.xlarge',
               sagemaker_session=sagemaker_session,
               framework_version=mxnet_full_version)

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, 'train'),
        key_prefix='integ-test-data/mxnet_mnist/train')
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, 'test'),
        key_prefix='integ-test-data/mxnet_mnist/test')
    job_name = unique_name_from_base('test-mxnet-transform')

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        mx.fit({'train': train_input, 'test': test_input}, job_name=job_name)

    transform_input_path = os.path.join(data_path, 'transform', 'data.csv')
    transform_input_key_prefix = 'integ-test-data/mxnet_mnist/transform'
    transform_input = mx.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix)

    transformer = mx.transformer(1, 'ml.m4.xlarge', tags=tags)
    transformer.transform(transform_input, content_type='text/csv')

    with timeout_and_delete_model_with_transformer(
            transformer,
            sagemaker_session,
            minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES):
        transformer.wait()
        model_desc = sagemaker_session.sagemaker_client.describe_model(
            ModelName=transformer.model_name)
        model_tags = sagemaker_session.sagemaker_client.list_tags(
            ResourceArn=model_desc['ModelArn'])['Tags']
        assert tags == model_tags
예제 #27
0
def test_transform_tf_kms_network_isolation(sagemaker_session,
                                            cpu_instance_type, tmpdir,
                                            tf_full_version, py_version):
    data_path = os.path.join(DATA_DIR, "tensorflow_mnist")

    tf = TensorFlow(
        entry_point=os.path.join(data_path, "mnist.py"),
        role="SageMakerRole",
        train_instance_count=1,
        train_instance_type=cpu_instance_type,
        framework_version=tf_full_version,
        script_mode=True,
        py_version=py_version,
        sagemaker_session=sagemaker_session,
    )

    s3_prefix = "integ-test-data/tf-scriptmode/mnist"
    training_input = sagemaker_session.upload_data(
        path=os.path.join(data_path, "data"),
        key_prefix="{}/training".format(s3_prefix))

    job_name = unique_name_from_base("test-tf-transform")
    tf.fit(inputs=training_input, job_name=job_name)

    transform_input = sagemaker_session.upload_data(
        path=os.path.join(data_path, "transform"),
        key_prefix="{}/transform".format(s3_prefix))

    with bucket_with_encryption(sagemaker_session,
                                "SageMakerRole") as (bucket_with_kms, kms_key):
        output_path = "{}/{}/output".format(bucket_with_kms, job_name)

        transformer = tf.transformer(
            instance_count=1,
            instance_type=cpu_instance_type,
            output_path=output_path,
            output_kms_key=kms_key,
            volume_kms_key=kms_key,
            enable_network_isolation=True,
        )

        with timeout_and_delete_model_with_transformer(
                transformer,
                sagemaker_session,
                minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES):
            transformer.transform(transform_input,
                                  job_name=job_name,
                                  content_type="text/csv",
                                  wait=True)

            model_desc = sagemaker_session.sagemaker_client.describe_model(
                ModelName=transformer.model_name)
            assert model_desc["EnableNetworkIsolation"]

        job_desc = sagemaker_session.describe_transform_job(job_name=job_name)
        assert job_desc["TransformOutput"]["S3OutputPath"] == output_path
        assert job_desc["TransformOutput"]["KmsKeyId"] == kms_key
        assert job_desc["TransformResources"]["VolumeKmsKeyId"] == kms_key

        s3.S3Downloader.download(
            s3_uri=output_path,
            local_path=os.path.join(tmpdir, "tf-batch-output"),
            session=sagemaker_session,
        )

        with open(os.path.join(tmpdir, "tf-batch-output",
                               "data.csv.out")) as f:
            result = json.load(f)
            assert len(result["predictions"][0]["probabilities"]) == 10
            assert result["predictions"][0]["classes"] == 1