def test_fit_pca(sagemaker_session, tuner): pca = PCA(ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, NUM_COMPONENTS, base_job_name='pca', sagemaker_session=sagemaker_session) pca.algorithm_mode = 'randomized' pca.subtract_mean = True pca.extra_components = 5 tuner.estimator = pca tags = [{'Name': 'some-tag-without-a-value'}] tuner.tags = tags hyperparameter_ranges = {'num_components': IntegerParameter(2, 4), 'algorithm_mode': CategoricalParameter(['regular', 'randomized'])} tuner._hyperparameter_ranges = hyperparameter_ranges records = RecordSet(s3_data=INPUTS, num_records=1, feature_dim=1) tuner.fit(records, mini_batch_size=9999) _, _, tune_kwargs = sagemaker_session.tune.mock_calls[0] assert len(tune_kwargs['static_hyperparameters']) == 4 assert tune_kwargs['static_hyperparameters']['extra_components'] == '5' assert len(tune_kwargs['parameter_ranges']['IntegerParameterRanges']) == 1 assert tune_kwargs['job_name'].startswith('pca') assert tune_kwargs['tags'] == tags assert tuner.estimator.mini_batch_size == 9999
def test_prepare_for_training_list(sagemaker_session): pca = PCA(num_components=55, sagemaker_session=sagemaker_session, **COMMON_ARGS) train = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 8.0], [44.0, 55.0, 66.0]] labels = [99, 85, 87, 2] records = [pca.record_set(np.array(train), np.array(labels))] pca._prepare_for_training(records, mini_batch_size=1) assert pca.feature_dim == 3 assert pca.mini_batch_size == 1
def test_prepare_for_training_list_no_train_channel(sagemaker_session): pca = PCA(num_components=55, sagemaker_session=sagemaker_session, **COMMON_ARGS) train = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 8.0], [44.0, 55.0, 66.0]] labels = [99, 85, 87, 2] records = [pca.record_set(np.array(train), np.array(labels), 'test')] with pytest.raises(ValueError) as ex: pca._prepare_for_training(records, mini_batch_size=1) assert 'Must provide train channel.' in str(ex)
def test_fit_ndarray(time, sagemaker_session): mock_s3 = Mock() mock_object = Mock() mock_s3.Object = Mock(return_value=mock_object) sagemaker_session.boto_session.resource = Mock(return_value=mock_s3) kwargs = dict(COMMON_ARGS) kwargs['train_instance_count'] = 3 pca = PCA(num_components=55, sagemaker_session=sagemaker_session, data_location='s3://{}/key-prefix/'.format(BUCKET_NAME), **kwargs) train = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 8.0], [44.0, 55.0, 66.0]] labels = [99, 85, 87, 2] pca.fit(pca.record_set(np.array(train), np.array(labels))) mock_s3.Object.assert_any_call( BUCKET_NAME, 'key-prefix/PCA-2017-11-06-14:14:15.671/matrix_0.pbr'.format(TIMESTAMP)) mock_s3.Object.assert_any_call( BUCKET_NAME, 'key-prefix/PCA-2017-11-06-14:14:15.671/matrix_1.pbr'.format(TIMESTAMP)) mock_s3.Object.assert_any_call( BUCKET_NAME, 'key-prefix/PCA-2017-11-06-14:14:15.671/matrix_2.pbr'.format(TIMESTAMP)) mock_s3.Object.assert_any_call( BUCKET_NAME, 'key-prefix/PCA-2017-11-06-14:14:15.671/.amazon.manifest'.format(TIMESTAMP)) assert mock_object.put.call_count == 4
def test_init(sagemaker_session): pca = PCA(num_components=55, sagemaker_session=sagemaker_session, **COMMON_ARGS) assert pca.num_components == 55 assert pca.enable_network_isolation() is False
def test_data_location_validation(sagemaker_session): pca = PCA(num_components=2, sagemaker_session=sagemaker_session, **COMMON_ARGS) with pytest.raises(ValueError): pca.data_location = "nots3://abcd/efgh"
def test_init(sagemaker_session): pca = PCA(num_components=55, sagemaker_session=sagemaker_session, **COMMON_ARGS) assert pca.num_components == 55
ESTIMATOR_NAME_TWO = "estimator_name_two" SAGEMAKER_SESSION = Mock() ESTIMATOR = Estimator( IMAGE_NAME, ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, output_path="s3://bucket/prefix", sagemaker_session=SAGEMAKER_SESSION, ) ESTIMATOR_TWO = PCA( ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, NUM_COMPONENTS, sagemaker_session=SAGEMAKER_SESSION, ) WARM_START_CONFIG = WarmStartConfig( warm_start_type=WarmStartTypes.IDENTICAL_DATA_AND_ALGORITHM, parents={"p1", "p2", "p3"} ) TUNING_JOB_DETAILS = { "HyperParameterTuningJobConfig": { "ResourceLimits": {"MaxParallelTrainingJobs": 1, "MaxNumberOfTrainingJobs": 1}, "HyperParameterTuningJobObjective": { "MetricName": OBJECTIVE_METRIC_NAME, "Type": "Minimize", },
def test_required_hyper_parameters_value(sagemaker_session, required_hyper_parameters, value): with pytest.raises(ValueError): test_params = ALL_REQ_ARGS.copy() test_params[required_hyper_parameters] = value PCA(sagemaker_session=sagemaker_session, **test_params)
def test_image(sagemaker_session): pca = PCA(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) assert pca.train_image() == registry(REGION, 'pca') + '/pca:1'
def test_init_required_positional(sagemaker_session): pca = PCA(ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, NUM_COMPONENTS, sagemaker_session=sagemaker_session) assert pca.role == ROLE assert pca.train_instance_count == TRAIN_INSTANCE_COUNT assert pca.train_instance_type == TRAIN_INSTANCE_TYPE assert pca.num_components == NUM_COMPONENTS
def test_optional_hyper_parameters_value(sagemaker_session, optional_hyper_parameters, value): with pytest.raises(ValueError): test_params = ALL_REQ_ARGS.copy() test_params.update({optional_hyper_parameters: value}) PCA(sagemaker_session=sagemaker_session, **test_params)
def test_image(sagemaker_session): pca = PCA(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) assert image_uris.retrieve("pca", REGION) == pca.training_image_uri()
def test_image(sagemaker_session): pca = PCA(num_components=55, sagemaker_session=sagemaker_session, **COMMON_ARGS) assert pca.train_image() == registry('us-west-2') + '/pca:1'
def test_data_location_does_not_call_default_bucket(sagemaker_session): data_location = "s3://my-bucket/path/" pca = PCA(num_components=2, sagemaker_session=sagemaker_session, data_location=data_location, **COMMON_ARGS) assert pca.data_location == data_location assert not sagemaker_session.default_bucket.called