def test_fit_pca(sagemaker_session, tuner):
    pca = PCA(ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, NUM_COMPONENTS,
              base_job_name='pca', sagemaker_session=sagemaker_session)

    pca.algorithm_mode = 'randomized'
    pca.subtract_mean = True
    pca.extra_components = 5

    tuner.estimator = pca

    tags = [{'Name': 'some-tag-without-a-value'}]
    tuner.tags = tags

    hyperparameter_ranges = {'num_components': IntegerParameter(2, 4),
                             'algorithm_mode': CategoricalParameter(['regular', 'randomized'])}
    tuner._hyperparameter_ranges = hyperparameter_ranges

    records = RecordSet(s3_data=INPUTS, num_records=1, feature_dim=1)
    tuner.fit(records, mini_batch_size=9999)

    _, _, tune_kwargs = sagemaker_session.tune.mock_calls[0]

    assert len(tune_kwargs['static_hyperparameters']) == 4
    assert tune_kwargs['static_hyperparameters']['extra_components'] == '5'
    assert len(tune_kwargs['parameter_ranges']['IntegerParameterRanges']) == 1
    assert tune_kwargs['job_name'].startswith('pca')
    assert tune_kwargs['tags'] == tags
    assert tuner.estimator.mini_batch_size == 9999
def test_prepare_for_training_list(sagemaker_session):
    pca = PCA(num_components=55, sagemaker_session=sagemaker_session, **COMMON_ARGS)

    train = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 8.0], [44.0, 55.0, 66.0]]
    labels = [99, 85, 87, 2]
    records = [pca.record_set(np.array(train), np.array(labels))]

    pca._prepare_for_training(records, mini_batch_size=1)
    assert pca.feature_dim == 3
    assert pca.mini_batch_size == 1
def test_prepare_for_training_list_no_train_channel(sagemaker_session):
    pca = PCA(num_components=55, sagemaker_session=sagemaker_session, **COMMON_ARGS)

    train = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 8.0], [44.0, 55.0, 66.0]]
    labels = [99, 85, 87, 2]
    records = [pca.record_set(np.array(train), np.array(labels), 'test')]

    with pytest.raises(ValueError) as ex:
        pca._prepare_for_training(records, mini_batch_size=1)

    assert 'Must provide train channel.' in str(ex)
def test_fit_ndarray(time, sagemaker_session):
    mock_s3 = Mock()
    mock_object = Mock()
    mock_s3.Object = Mock(return_value=mock_object)
    sagemaker_session.boto_session.resource = Mock(return_value=mock_s3)
    kwargs = dict(COMMON_ARGS)
    kwargs['train_instance_count'] = 3
    pca = PCA(num_components=55, sagemaker_session=sagemaker_session,
              data_location='s3://{}/key-prefix/'.format(BUCKET_NAME), **kwargs)
    train = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 8.0], [44.0, 55.0, 66.0]]
    labels = [99, 85, 87, 2]
    pca.fit(pca.record_set(np.array(train), np.array(labels)))
    mock_s3.Object.assert_any_call(
        BUCKET_NAME, 'key-prefix/PCA-2017-11-06-14:14:15.671/matrix_0.pbr'.format(TIMESTAMP))
    mock_s3.Object.assert_any_call(
        BUCKET_NAME, 'key-prefix/PCA-2017-11-06-14:14:15.671/matrix_1.pbr'.format(TIMESTAMP))
    mock_s3.Object.assert_any_call(
        BUCKET_NAME, 'key-prefix/PCA-2017-11-06-14:14:15.671/matrix_2.pbr'.format(TIMESTAMP))
    mock_s3.Object.assert_any_call(
        BUCKET_NAME, 'key-prefix/PCA-2017-11-06-14:14:15.671/.amazon.manifest'.format(TIMESTAMP))

    assert mock_object.put.call_count == 4
def test_init(sagemaker_session):
    pca = PCA(num_components=55,
              sagemaker_session=sagemaker_session,
              **COMMON_ARGS)
    assert pca.num_components == 55
    assert pca.enable_network_isolation() is False
def test_data_location_validation(sagemaker_session):
    pca = PCA(num_components=2,
              sagemaker_session=sagemaker_session,
              **COMMON_ARGS)
    with pytest.raises(ValueError):
        pca.data_location = "nots3://abcd/efgh"
Example #7
0
def test_init(sagemaker_session):
    pca = PCA(num_components=55,
              sagemaker_session=sagemaker_session,
              **COMMON_ARGS)
    assert pca.num_components == 55
ESTIMATOR_NAME_TWO = "estimator_name_two"

SAGEMAKER_SESSION = Mock()

ESTIMATOR = Estimator(
    IMAGE_NAME,
    ROLE,
    TRAIN_INSTANCE_COUNT,
    TRAIN_INSTANCE_TYPE,
    output_path="s3://bucket/prefix",
    sagemaker_session=SAGEMAKER_SESSION,
)
ESTIMATOR_TWO = PCA(
    ROLE,
    TRAIN_INSTANCE_COUNT,
    TRAIN_INSTANCE_TYPE,
    NUM_COMPONENTS,
    sagemaker_session=SAGEMAKER_SESSION,
)

WARM_START_CONFIG = WarmStartConfig(
    warm_start_type=WarmStartTypes.IDENTICAL_DATA_AND_ALGORITHM, parents={"p1", "p2", "p3"}
)

TUNING_JOB_DETAILS = {
    "HyperParameterTuningJobConfig": {
        "ResourceLimits": {"MaxParallelTrainingJobs": 1, "MaxNumberOfTrainingJobs": 1},
        "HyperParameterTuningJobObjective": {
            "MetricName": OBJECTIVE_METRIC_NAME,
            "Type": "Minimize",
        },
Example #9
0
def test_required_hyper_parameters_value(sagemaker_session, required_hyper_parameters, value):
    with pytest.raises(ValueError):
        test_params = ALL_REQ_ARGS.copy()
        test_params[required_hyper_parameters] = value
        PCA(sagemaker_session=sagemaker_session, **test_params)
Example #10
0
def test_image(sagemaker_session):
    pca = PCA(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)
    assert pca.train_image() == registry(REGION, 'pca') + '/pca:1'
Example #11
0
def test_init_required_positional(sagemaker_session):
    pca = PCA(ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, NUM_COMPONENTS, sagemaker_session=sagemaker_session)
    assert pca.role == ROLE
    assert pca.train_instance_count == TRAIN_INSTANCE_COUNT
    assert pca.train_instance_type == TRAIN_INSTANCE_TYPE
    assert pca.num_components == NUM_COMPONENTS
Example #12
0
def test_optional_hyper_parameters_value(sagemaker_session, optional_hyper_parameters, value):
    with pytest.raises(ValueError):
        test_params = ALL_REQ_ARGS.copy()
        test_params.update({optional_hyper_parameters: value})
        PCA(sagemaker_session=sagemaker_session, **test_params)
def test_data_location_validation(sagemaker_session):
    pca = PCA(num_components=2, sagemaker_session=sagemaker_session, **COMMON_ARGS)
    with pytest.raises(ValueError):
        pca.data_location = "nots3://abcd/efgh"
def test_image(sagemaker_session):
    pca = PCA(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)
    assert image_uris.retrieve("pca", REGION) == pca.training_image_uri()
def test_image(sagemaker_session):
    pca = PCA(num_components=55,
              sagemaker_session=sagemaker_session,
              **COMMON_ARGS)
    assert pca.train_image() == registry('us-west-2') + '/pca:1'
Example #16
0
def test_data_location_does_not_call_default_bucket(sagemaker_session):
    data_location = "s3://my-bucket/path/"
    pca = PCA(num_components=2, sagemaker_session=sagemaker_session, data_location=data_location, **COMMON_ARGS)
    assert pca.data_location == data_location
    assert not sagemaker_session.default_bucket.called