def test_fit_ndarray(time, sagemaker_session): mock_s3 = Mock() mock_object = Mock() mock_s3.Object = Mock(return_value=mock_object) sagemaker_session.boto_session.resource = Mock(return_value=mock_s3) kwargs = dict(COMMON_ARGS) kwargs["train_instance_count"] = 3 pca = PCA(num_components=55, sagemaker_session=sagemaker_session, data_location="s3://{}/key-prefix/".format(BUCKET_NAME), **kwargs) train = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 8.0], [44.0, 55.0, 66.0]] labels = [99, 85, 87, 2] pca.fit(pca.record_set(np.array(train), np.array(labels))) mock_s3.Object.assert_any_call( BUCKET_NAME, "key-prefix/PCA-2017-11-06-14:14:15.671/matrix_0.pbr".format( TIMESTAMP)) mock_s3.Object.assert_any_call( BUCKET_NAME, "key-prefix/PCA-2017-11-06-14:14:15.671/matrix_1.pbr".format( TIMESTAMP)) mock_s3.Object.assert_any_call( BUCKET_NAME, "key-prefix/PCA-2017-11-06-14:14:15.671/matrix_2.pbr".format( TIMESTAMP)) mock_s3.Object.assert_any_call( BUCKET_NAME, "key-prefix/PCA-2017-11-06-14:14:15.671/.amazon.manifest".format( TIMESTAMP)) assert mock_object.put.call_count == 4
def test_model_image(sagemaker_session): pca = PCA(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') pca.fit(data, MINI_BATCH_SIZE) model = pca.create_model() assert model.image == registry(REGION, 'pca') + '/pca:1'
def test_init_all_pca_hyperparameters(sagemaker_session): pca = PCA(num_components=55, algorithm_mode='randomized', subtract_mean=True, extra_components=33, sagemaker_session=sagemaker_session, **COMMON_ARGS) assert pca.num_components == 55 assert pca.algorithm_mode == 'randomized' assert pca.extra_components == 33
def test_init_enable_network_isolation(sagemaker_session): pca = PCA(num_components=55, sagemaker_session=sagemaker_session, enable_network_isolation=True, **COMMON_ARGS) assert pca.num_components == 55 assert pca.enable_network_isolation() is True
def test_fit_pca(sagemaker_session, tuner): pca = PCA(ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, NUM_COMPONENTS, base_job_name='pca', sagemaker_session=sagemaker_session) pca.algorithm_mode = 'randomized' pca.subtract_mean = True pca.extra_components = 5 tuner.estimator = pca tags = [{'Name': 'some-tag-without-a-value'}] tuner.tags = tags hyperparameter_ranges = {'num_components': IntegerParameter(2, 4), 'algorithm_mode': CategoricalParameter(['regular', 'randomized'])} tuner._hyperparameter_ranges = hyperparameter_ranges records = RecordSet(s3_data=INPUTS, num_records=1, feature_dim=1) tuner.fit(records, mini_batch_size=9999) _, _, tune_kwargs = sagemaker_session.tune.mock_calls[0] assert len(tune_kwargs['static_hyperparameters']) == 4 assert tune_kwargs['static_hyperparameters']['extra_components'] == '5' assert len(tune_kwargs['parameter_ranges']['IntegerParameterRanges']) == 1 assert tune_kwargs['job_name'].startswith('pca') assert tune_kwargs['tags'] == tags assert tuner.estimator.mini_batch_size == 9999
def test_init_required_named(sagemaker_session): pca = PCA(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) assert pca.role == COMMON_TRAIN_ARGS["role"] assert pca.train_instance_count == TRAIN_INSTANCE_COUNT assert pca.train_instance_type == COMMON_TRAIN_ARGS["train_instance_type"] assert pca.num_components == ALL_REQ_ARGS["num_components"]
def test_prepare_for_training_with_amazon_estimator(tuner, sagemaker_session): tuner.estimator = PCA(ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, NUM_COMPONENTS, sagemaker_session=sagemaker_session) tuner._prepare_for_training() assert 'sagemaker_estimator_class_name' not in tuner.static_hyperparameters assert 'sagemaker_estimator_module' not in tuner.static_hyperparameters
def test_init_estimator_args(sagemaker_session): pca = PCA(num_components=1, train_max_run=1234, sagemaker_session=sagemaker_session, data_location='s3://some-bucket/some-key/', **COMMON_ARGS) assert pca.train_instance_type == COMMON_ARGS['train_instance_type'] assert pca.train_instance_count == COMMON_ARGS['train_instance_count'] assert pca.role == COMMON_ARGS['role'] assert pca.train_max_run == 1234 assert pca.data_location == 's3://some-bucket/some-key/'
def test_predictor_type(sagemaker_session): pca = PCA(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') pca.fit(data, MINI_BATCH_SIZE) model = pca.create_model() predictor = model.deploy(1, TRAIN_INSTANCE_TYPE) assert isinstance(predictor, PCAPredictor)
def test_prepare_for_training_wrong_type_mini_batch_size(sagemaker_session): pca = PCA(base_job_name='pca', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') with pytest.raises((TypeError, ValueError)): pca.fit(data, 'some')
def test_prepare_for_training_no_mini_batch_size(sagemaker_session): pca = PCA(base_job_name='pca', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') pca._prepare_for_training(data) assert pca.mini_batch_size == 1
def test_data_location_does_not_call_default_bucket(sagemaker_session): data_location = "s3://my-bucket/path/" pca = PCA(num_components=2, sagemaker_session=sagemaker_session, data_location=data_location, **COMMON_ARGS) assert pca.data_location == data_location assert not sagemaker_session.default_bucket.called
def test_all_hyperparameters(sagemaker_session): pca = PCA(sagemaker_session=sagemaker_session, algorithm_mode='regular', subtract_mean='True', extra_components=1, **ALL_REQ_ARGS) assert pca.hyperparameters() == dict( num_components=str(ALL_REQ_ARGS['num_components']), algorithm_mode='regular', subtract_mean='True', extra_components='1' )
def test_prepare_for_training_multiple_channel(sagemaker_session): lr = PCA(base_job_name='lr', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') lr._prepare_for_training([data, data]) assert lr.mini_batch_size == 1
def test_pca_hyperparameters(sagemaker_session): pca = PCA(num_components=55, algorithm_mode='randomized', subtract_mean=True, extra_components=33, sagemaker_session=sagemaker_session, **COMMON_ARGS) assert pca.hyperparameters() == dict( num_components='55', extra_components='33', subtract_mean='True', algorithm_mode='randomized')
def test_init_required_positional(sagemaker_session): pca = PCA(ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, NUM_COMPONENTS, sagemaker_session=sagemaker_session) assert pca.role == ROLE assert pca.train_instance_count == TRAIN_INSTANCE_COUNT assert pca.train_instance_type == TRAIN_INSTANCE_TYPE assert pca.num_components == NUM_COMPONENTS
def test_prepare_for_training_list(sagemaker_session): pca = PCA(num_components=55, sagemaker_session=sagemaker_session, **COMMON_ARGS) train = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 8.0], [44.0, 55.0, 66.0]] labels = [99, 85, 87, 2] records = [pca.record_set(np.array(train), np.array(labels))] pca._prepare_for_training(records, mini_batch_size=1) assert pca.feature_dim == 3 assert pca.mini_batch_size == 1
def test_prepare_for_training_multiple_channel_no_train(sagemaker_session): lr = PCA(base_job_name='lr', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='mock') with pytest.raises(ValueError) as ex: lr._prepare_for_training([data, data]) assert 'Must provide train channel.' in str(ex)
def test_call_fit_none_mini_batch_size(sagemaker_session): pca = PCA(base_job_name='pca', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') pca.fit(data)
def test_validate_parameter_ranges_string_value_validation_error(sagemaker_session): pca = PCA(ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, NUM_COMPONENTS, base_job_name='pca', sagemaker_session=sagemaker_session) invalid_hyperparameter_ranges = {'algorithm_mode': CategoricalParameter([0, 5])} with pytest.raises(ValueError) as e: HyperparameterTuner(estimator=pca, objective_metric_name=OBJECTIVE_METRIC_NAME, hyperparameter_ranges=invalid_hyperparameter_ranges, metric_definitions=METRIC_DEFINTIONS) assert 'Value must be one of "regular" and "randomized"' in str(e)
def test_init_estimator_args(sagemaker_session): pca = PCA(num_components=1, max_run=1234, sagemaker_session=sagemaker_session, data_location="s3://some-bucket/some-key/", **COMMON_ARGS) assert pca.instance_type == COMMON_ARGS["instance_type"] assert pca.instance_count == COMMON_ARGS["instance_count"] assert pca.role == COMMON_ARGS["role"] assert pca.max_run == 1234 assert pca.data_location == "s3://some-bucket/some-key/"
def test_validate_parameter_ranges_number_validation_error(sagemaker_session): pca = PCA(ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, NUM_COMPONENTS, base_job_name='pca', sagemaker_session=sagemaker_session) invalid_hyperparameter_ranges = {'num_components': IntegerParameter(-1, 2)} with pytest.raises(ValueError) as e: HyperparameterTuner(estimator=pca, objective_metric_name=OBJECTIVE_METRIC_NAME, hyperparameter_ranges=invalid_hyperparameter_ranges, metric_definitions=METRIC_DEFINTIONS) assert 'Value must be an integer greater than zero' in str(e)
def test_call_fit(base_fit, sagemaker_session): pca = PCA(base_job_name='pca', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') pca.fit(data, MINI_BATCH_SIZE) base_fit.assert_called_once() assert len(base_fit.call_args[0]) == 2 assert base_fit.call_args[0][0] == data assert base_fit.call_args[0][1] == MINI_BATCH_SIZE
def test_prepare_for_training_list_no_train_channel(sagemaker_session): pca = PCA(num_components=55, sagemaker_session=sagemaker_session, **COMMON_ARGS) train = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 8.0], [44.0, 55.0, 66.0]] labels = [99, 85, 87, 2] records = [pca.record_set(np.array(train), np.array(labels), "test")] with pytest.raises(ValueError) as ex: pca._prepare_for_training(records, mini_batch_size=1) assert "Must provide train channel." in str(ex)
def test_all_hyperparameters(sagemaker_session): pca = PCA(sagemaker_session=sagemaker_session, algorithm_mode="regular", subtract_mean="True", extra_components=1, **ALL_REQ_ARGS) assert pca.hyperparameters() == dict( num_components=str(ALL_REQ_ARGS["num_components"]), algorithm_mode="regular", subtract_mean="True", extra_components="1", )
def test_model_image(sagemaker_session): pca = PCA(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet( "s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel="train", ) pca.fit(data, MINI_BATCH_SIZE) model = pca.create_model() assert image_uris.retrieve("pca", REGION) == model.image_uri
def test_image(sagemaker_session): pca = PCA(num_components=55, sagemaker_session=sagemaker_session, **COMMON_ARGS) assert pca.train_image() == registry('us-west-2') + '/pca:1' ic = ImageClassification(num_classes=257, num_training_samples=15420, epochs=1, image_shape='3,32,32', sagemaker_session=sagemaker_session, **COMMON_ARGS) assert ic.train_image() == registry( 'us-west-2', 'image_classification') + '/image-classification:latest'
def test_fit_pca_with_inter_container_traffic_encryption_flag(sagemaker_session, tuner): pca = PCA(ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, NUM_COMPONENTS, base_job_name='pca', sagemaker_session=sagemaker_session, encrypt_inter_container_traffic=True) tuner.estimator = pca records = RecordSet(s3_data=INPUTS, num_records=1, feature_dim=1) tuner.fit(records, mini_batch_size=9999) _, _, tune_kwargs = sagemaker_session.tune.mock_calls[0] assert tune_kwargs['job_name'].startswith('pca') assert tune_kwargs['encrypt_inter_container_traffic'] is True
def test_fit_mxnet_with_vpc_config(sagemaker_session, tuner): subnets = ['foo'] security_group_ids = ['bar'] pca = PCA(ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, NUM_COMPONENTS, base_job_name='pca', sagemaker_session=sagemaker_session, subnets=subnets, security_group_ids=security_group_ids) tuner.estimator = pca records = RecordSet(s3_data=INPUTS, num_records=1, feature_dim=1) tuner.fit(records, mini_batch_size=9999) _, _, tune_kwargs = sagemaker_session.tune.mock_calls[0] assert tune_kwargs['vpc_config'] == {'Subnets': subnets, 'SecurityGroupIds': security_group_ids}
def test_fit_pca_with_early_stopping(sagemaker_session, tuner): pca = PCA(ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, NUM_COMPONENTS, base_job_name='pca', sagemaker_session=sagemaker_session) tuner.estimator = pca tuner.early_stopping_type = 'Auto' records = RecordSet(s3_data=INPUTS, num_records=1, feature_dim=1) tuner.fit(records, mini_batch_size=9999) _, _, tune_kwargs = sagemaker_session.tune.mock_calls[0] assert tune_kwargs['job_name'].startswith('pca') assert tune_kwargs['early_stopping_type'] == 'Auto'