def test_fit_ndarray(time, sagemaker_session):
    mock_s3 = Mock()
    mock_object = Mock()
    mock_s3.Object = Mock(return_value=mock_object)
    sagemaker_session.boto_session.resource = Mock(return_value=mock_s3)
    kwargs = dict(COMMON_ARGS)
    kwargs["train_instance_count"] = 3
    pca = PCA(num_components=55,
              sagemaker_session=sagemaker_session,
              data_location="s3://{}/key-prefix/".format(BUCKET_NAME),
              **kwargs)
    train = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 8.0],
             [44.0, 55.0, 66.0]]
    labels = [99, 85, 87, 2]
    pca.fit(pca.record_set(np.array(train), np.array(labels)))
    mock_s3.Object.assert_any_call(
        BUCKET_NAME,
        "key-prefix/PCA-2017-11-06-14:14:15.671/matrix_0.pbr".format(
            TIMESTAMP))
    mock_s3.Object.assert_any_call(
        BUCKET_NAME,
        "key-prefix/PCA-2017-11-06-14:14:15.671/matrix_1.pbr".format(
            TIMESTAMP))
    mock_s3.Object.assert_any_call(
        BUCKET_NAME,
        "key-prefix/PCA-2017-11-06-14:14:15.671/matrix_2.pbr".format(
            TIMESTAMP))
    mock_s3.Object.assert_any_call(
        BUCKET_NAME,
        "key-prefix/PCA-2017-11-06-14:14:15.671/.amazon.manifest".format(
            TIMESTAMP))

    assert mock_object.put.call_count == 4
Esempio n. 2
0
def test_model_image(sagemaker_session):
    pca = PCA(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)
    data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train')
    pca.fit(data, MINI_BATCH_SIZE)

    model = pca.create_model()
    assert model.image == registry(REGION, 'pca') + '/pca:1'
Esempio n. 3
0
def test_init_all_pca_hyperparameters(sagemaker_session):
    pca = PCA(num_components=55, algorithm_mode='randomized',
              subtract_mean=True, extra_components=33, sagemaker_session=sagemaker_session,
              **COMMON_ARGS)
    assert pca.num_components == 55
    assert pca.algorithm_mode == 'randomized'
    assert pca.extra_components == 33
def test_init_enable_network_isolation(sagemaker_session):
    pca = PCA(num_components=55,
              sagemaker_session=sagemaker_session,
              enable_network_isolation=True,
              **COMMON_ARGS)
    assert pca.num_components == 55
    assert pca.enable_network_isolation() is True
def test_fit_pca(sagemaker_session, tuner):
    pca = PCA(ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, NUM_COMPONENTS,
              base_job_name='pca', sagemaker_session=sagemaker_session)

    pca.algorithm_mode = 'randomized'
    pca.subtract_mean = True
    pca.extra_components = 5

    tuner.estimator = pca

    tags = [{'Name': 'some-tag-without-a-value'}]
    tuner.tags = tags

    hyperparameter_ranges = {'num_components': IntegerParameter(2, 4),
                             'algorithm_mode': CategoricalParameter(['regular', 'randomized'])}
    tuner._hyperparameter_ranges = hyperparameter_ranges

    records = RecordSet(s3_data=INPUTS, num_records=1, feature_dim=1)
    tuner.fit(records, mini_batch_size=9999)

    _, _, tune_kwargs = sagemaker_session.tune.mock_calls[0]

    assert len(tune_kwargs['static_hyperparameters']) == 4
    assert tune_kwargs['static_hyperparameters']['extra_components'] == '5'
    assert len(tune_kwargs['parameter_ranges']['IntegerParameterRanges']) == 1
    assert tune_kwargs['job_name'].startswith('pca')
    assert tune_kwargs['tags'] == tags
    assert tuner.estimator.mini_batch_size == 9999
Esempio n. 6
0
def test_init_required_named(sagemaker_session):
    pca = PCA(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)

    assert pca.role == COMMON_TRAIN_ARGS["role"]
    assert pca.train_instance_count == TRAIN_INSTANCE_COUNT
    assert pca.train_instance_type == COMMON_TRAIN_ARGS["train_instance_type"]
    assert pca.num_components == ALL_REQ_ARGS["num_components"]
def test_prepare_for_training_with_amazon_estimator(tuner, sagemaker_session):
    tuner.estimator = PCA(ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, NUM_COMPONENTS,
                          sagemaker_session=sagemaker_session)

    tuner._prepare_for_training()
    assert 'sagemaker_estimator_class_name' not in tuner.static_hyperparameters
    assert 'sagemaker_estimator_module' not in tuner.static_hyperparameters
Esempio n. 8
0
def test_init_estimator_args(sagemaker_session):
    pca = PCA(num_components=1, train_max_run=1234, sagemaker_session=sagemaker_session,
              data_location='s3://some-bucket/some-key/', **COMMON_ARGS)
    assert pca.train_instance_type == COMMON_ARGS['train_instance_type']
    assert pca.train_instance_count == COMMON_ARGS['train_instance_count']
    assert pca.role == COMMON_ARGS['role']
    assert pca.train_max_run == 1234
    assert pca.data_location == 's3://some-bucket/some-key/'
Esempio n. 9
0
def test_predictor_type(sagemaker_session):
    pca = PCA(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)
    data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train')
    pca.fit(data, MINI_BATCH_SIZE)
    model = pca.create_model()
    predictor = model.deploy(1, TRAIN_INSTANCE_TYPE)

    assert isinstance(predictor, PCAPredictor)
Esempio n. 10
0
def test_prepare_for_training_wrong_type_mini_batch_size(sagemaker_session):
    pca = PCA(base_job_name='pca', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)

    data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM,
                     channel='train')

    with pytest.raises((TypeError, ValueError)):
        pca.fit(data, 'some')
Esempio n. 11
0
def test_prepare_for_training_no_mini_batch_size(sagemaker_session):
    pca = PCA(base_job_name='pca', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)

    data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM,
                     channel='train')
    pca._prepare_for_training(data)

    assert pca.mini_batch_size == 1
def test_data_location_does_not_call_default_bucket(sagemaker_session):
    data_location = "s3://my-bucket/path/"
    pca = PCA(num_components=2,
              sagemaker_session=sagemaker_session,
              data_location=data_location,
              **COMMON_ARGS)
    assert pca.data_location == data_location
    assert not sagemaker_session.default_bucket.called
Esempio n. 13
0
def test_all_hyperparameters(sagemaker_session):
    pca = PCA(sagemaker_session=sagemaker_session,
              algorithm_mode='regular', subtract_mean='True', extra_components=1, **ALL_REQ_ARGS)
    assert pca.hyperparameters() == dict(
        num_components=str(ALL_REQ_ARGS['num_components']),
        algorithm_mode='regular',
        subtract_mean='True',
        extra_components='1'
    )
Esempio n. 14
0
def test_prepare_for_training_multiple_channel(sagemaker_session):
    lr = PCA(base_job_name='lr', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)

    data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM,
                     channel='train')

    lr._prepare_for_training([data, data])

    assert lr.mini_batch_size == 1
Esempio n. 15
0
def test_pca_hyperparameters(sagemaker_session):
    pca = PCA(num_components=55, algorithm_mode='randomized',
              subtract_mean=True, extra_components=33, sagemaker_session=sagemaker_session,
              **COMMON_ARGS)
    assert pca.hyperparameters() == dict(
        num_components='55',
        extra_components='33',
        subtract_mean='True',
        algorithm_mode='randomized')
Esempio n. 16
0
def test_init_required_positional(sagemaker_session):
    pca = PCA(ROLE,
              TRAIN_INSTANCE_COUNT,
              TRAIN_INSTANCE_TYPE,
              NUM_COMPONENTS,
              sagemaker_session=sagemaker_session)
    assert pca.role == ROLE
    assert pca.train_instance_count == TRAIN_INSTANCE_COUNT
    assert pca.train_instance_type == TRAIN_INSTANCE_TYPE
    assert pca.num_components == NUM_COMPONENTS
def test_prepare_for_training_list(sagemaker_session):
    pca = PCA(num_components=55, sagemaker_session=sagemaker_session, **COMMON_ARGS)

    train = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 8.0], [44.0, 55.0, 66.0]]
    labels = [99, 85, 87, 2]
    records = [pca.record_set(np.array(train), np.array(labels))]

    pca._prepare_for_training(records, mini_batch_size=1)
    assert pca.feature_dim == 3
    assert pca.mini_batch_size == 1
Esempio n. 18
0
def test_prepare_for_training_multiple_channel_no_train(sagemaker_session):
    lr = PCA(base_job_name='lr', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)

    data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM,
                     channel='mock')

    with pytest.raises(ValueError) as ex:
        lr._prepare_for_training([data, data])

    assert 'Must provide train channel.' in str(ex)
Esempio n. 19
0
def test_call_fit_none_mini_batch_size(sagemaker_session):
    pca = PCA(base_job_name='pca',
              sagemaker_session=sagemaker_session,
              **ALL_REQ_ARGS)

    data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX),
                     num_records=1,
                     feature_dim=FEATURE_DIM,
                     channel='train')
    pca.fit(data)
def test_validate_parameter_ranges_string_value_validation_error(sagemaker_session):
    pca = PCA(ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, NUM_COMPONENTS,
              base_job_name='pca', sagemaker_session=sagemaker_session)

    invalid_hyperparameter_ranges = {'algorithm_mode': CategoricalParameter([0, 5])}

    with pytest.raises(ValueError) as e:
        HyperparameterTuner(estimator=pca, objective_metric_name=OBJECTIVE_METRIC_NAME,
                            hyperparameter_ranges=invalid_hyperparameter_ranges, metric_definitions=METRIC_DEFINTIONS)

    assert 'Value must be one of "regular" and "randomized"' in str(e)
def test_init_estimator_args(sagemaker_session):
    pca = PCA(num_components=1,
              max_run=1234,
              sagemaker_session=sagemaker_session,
              data_location="s3://some-bucket/some-key/",
              **COMMON_ARGS)
    assert pca.instance_type == COMMON_ARGS["instance_type"]
    assert pca.instance_count == COMMON_ARGS["instance_count"]
    assert pca.role == COMMON_ARGS["role"]
    assert pca.max_run == 1234
    assert pca.data_location == "s3://some-bucket/some-key/"
def test_validate_parameter_ranges_number_validation_error(sagemaker_session):
    pca = PCA(ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, NUM_COMPONENTS,
              base_job_name='pca', sagemaker_session=sagemaker_session)

    invalid_hyperparameter_ranges = {'num_components': IntegerParameter(-1, 2)}

    with pytest.raises(ValueError) as e:
        HyperparameterTuner(estimator=pca, objective_metric_name=OBJECTIVE_METRIC_NAME,
                            hyperparameter_ranges=invalid_hyperparameter_ranges, metric_definitions=METRIC_DEFINTIONS)

    assert 'Value must be an integer greater than zero' in str(e)
Esempio n. 23
0
def test_call_fit(base_fit, sagemaker_session):
    pca = PCA(base_job_name='pca', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)

    data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train')

    pca.fit(data, MINI_BATCH_SIZE)

    base_fit.assert_called_once()
    assert len(base_fit.call_args[0]) == 2
    assert base_fit.call_args[0][0] == data
    assert base_fit.call_args[0][1] == MINI_BATCH_SIZE
def test_prepare_for_training_list_no_train_channel(sagemaker_session):
    pca = PCA(num_components=55, sagemaker_session=sagemaker_session, **COMMON_ARGS)

    train = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 8.0], [44.0, 55.0, 66.0]]
    labels = [99, 85, 87, 2]
    records = [pca.record_set(np.array(train), np.array(labels), "test")]

    with pytest.raises(ValueError) as ex:
        pca._prepare_for_training(records, mini_batch_size=1)

    assert "Must provide train channel." in str(ex)
Esempio n. 25
0
def test_all_hyperparameters(sagemaker_session):
    pca = PCA(sagemaker_session=sagemaker_session,
              algorithm_mode="regular",
              subtract_mean="True",
              extra_components=1,
              **ALL_REQ_ARGS)
    assert pca.hyperparameters() == dict(
        num_components=str(ALL_REQ_ARGS["num_components"]),
        algorithm_mode="regular",
        subtract_mean="True",
        extra_components="1",
    )
def test_model_image(sagemaker_session):
    pca = PCA(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)
    data = RecordSet(
        "s3://{}/{}".format(BUCKET_NAME, PREFIX),
        num_records=1,
        feature_dim=FEATURE_DIM,
        channel="train",
    )
    pca.fit(data, MINI_BATCH_SIZE)

    model = pca.create_model()
    assert image_uris.retrieve("pca", REGION) == model.image_uri
Esempio n. 27
0
def test_image(sagemaker_session):
    pca = PCA(num_components=55,
              sagemaker_session=sagemaker_session,
              **COMMON_ARGS)
    assert pca.train_image() == registry('us-west-2') + '/pca:1'
    ic = ImageClassification(num_classes=257,
                             num_training_samples=15420,
                             epochs=1,
                             image_shape='3,32,32',
                             sagemaker_session=sagemaker_session,
                             **COMMON_ARGS)
    assert ic.train_image() == registry(
        'us-west-2', 'image_classification') + '/image-classification:latest'
def test_fit_pca_with_inter_container_traffic_encryption_flag(sagemaker_session, tuner):
    pca = PCA(ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, NUM_COMPONENTS,
              base_job_name='pca', sagemaker_session=sagemaker_session,
              encrypt_inter_container_traffic=True)

    tuner.estimator = pca

    records = RecordSet(s3_data=INPUTS, num_records=1, feature_dim=1)
    tuner.fit(records, mini_batch_size=9999)

    _, _, tune_kwargs = sagemaker_session.tune.mock_calls[0]

    assert tune_kwargs['job_name'].startswith('pca')
    assert tune_kwargs['encrypt_inter_container_traffic'] is True
def test_fit_mxnet_with_vpc_config(sagemaker_session, tuner):
    subnets = ['foo']
    security_group_ids = ['bar']

    pca = PCA(ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, NUM_COMPONENTS,
              base_job_name='pca', sagemaker_session=sagemaker_session,
              subnets=subnets, security_group_ids=security_group_ids)
    tuner.estimator = pca

    records = RecordSet(s3_data=INPUTS, num_records=1, feature_dim=1)
    tuner.fit(records, mini_batch_size=9999)

    _, _, tune_kwargs = sagemaker_session.tune.mock_calls[0]
    assert tune_kwargs['vpc_config'] == {'Subnets': subnets, 'SecurityGroupIds': security_group_ids}
def test_fit_pca_with_early_stopping(sagemaker_session, tuner):
    pca = PCA(ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, NUM_COMPONENTS,
              base_job_name='pca', sagemaker_session=sagemaker_session)

    tuner.estimator = pca
    tuner.early_stopping_type = 'Auto'

    records = RecordSet(s3_data=INPUTS, num_records=1, feature_dim=1)
    tuner.fit(records, mini_batch_size=9999)

    _, _, tune_kwargs = sagemaker_session.tune.mock_calls[0]

    assert tune_kwargs['job_name'].startswith('pca')
    assert tune_kwargs['early_stopping_type'] == 'Auto'