예제 #1
0
def test_model_image(sagemaker_session):
    kmeans = KMeans(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)
    data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train')
    kmeans.fit(data, MINI_BATCH_SIZE)

    model = kmeans.create_model()
    assert model.image == registry(REGION, 'kmeans') + '/kmeans:1'
예제 #2
0
def test_all_hyperparameters(sagemaker_session):
    kmeans = KMeans(sagemaker_session=sagemaker_session,
                    init_method='random',
                    max_iterations=3,
                    tol=0.5,
                    num_trials=5,
                    local_init_method='kmeans++',
                    half_life_time_size=0,
                    epochs=10,
                    center_factor=2,
                    eval_metrics=['msd', 'ssd'],
                    **ALL_REQ_ARGS)
    assert kmeans.hyperparameters() == dict(
        k=str(ALL_REQ_ARGS['k']),
        init_method='random',
        local_lloyd_max_iter='3',
        local_lloyd_tol='0.5',
        local_lloyd_num_trials='5',
        local_lloyd_init_method='kmeans++',
        half_life_time_size='0',
        epochs='10',
        extra_center_factor='2',
        eval_metrics='[\'msd\', \'ssd\']',
        force_dense='True',
    )
예제 #3
0
def test_model_image(sagemaker_session):
    kmeans = KMeans(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)
    data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train')
    kmeans.fit(data, MINI_BATCH_SIZE)

    model = kmeans.create_model()
    assert model.image == registry(REGION, 'kmeans') + '/kmeans:1'
예제 #4
0
def test_prepare_for_training_wrong_value_mini_batch_size(sagemaker_session):
    kmeans = KMeans(base_job_name='kmeans', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)

    data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM,
                     channel='train')
    with pytest.raises(ValueError):
        kmeans._prepare_for_training(data, 0)
예제 #5
0
def test_call_fit_wrong_value_mini_batch_size(sagemaker_session):
    kmeans = KMeans(base_job_name='kmeans', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)

    data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM,
                     channel='train')
    with pytest.raises(ValueError):
        kmeans.fit(data, 0)
예제 #6
0
def test_all_hyperparameters(sagemaker_session):
    kmeans = KMeans(sagemaker_session=sagemaker_session,
                    init_method="random",
                    max_iterations=3,
                    tol=0.5,
                    num_trials=5,
                    local_init_method="kmeans++",
                    half_life_time_size=0,
                    epochs=10,
                    center_factor=2,
                    eval_metrics=["msd", "ssd"],
                    **ALL_REQ_ARGS)
    assert kmeans.hyperparameters() == dict(
        k=str(ALL_REQ_ARGS["k"]),
        init_method="random",
        local_lloyd_max_iter="3",
        local_lloyd_tol="0.5",
        local_lloyd_num_trials="5",
        local_lloyd_init_method="kmeans++",
        half_life_time_size="0",
        epochs="10",
        extra_center_factor="2",
        eval_metrics='["msd", "ssd"]',
        force_dense="True",
    )
예제 #7
0
def test_predictor_type(sagemaker_session):
    kmeans = KMeans(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)
    data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train')
    kmeans.fit(data, MINI_BATCH_SIZE)
    model = kmeans.create_model()
    predictor = model.deploy(1, TRAIN_INSTANCE_TYPE)

    assert isinstance(predictor, KMeansPredictor)
예제 #8
0
def test_prepare_for_training_no_mini_batch_size(sagemaker_session):
    kmeans = KMeans(base_job_name='kmeans', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)

    data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM,
                     channel='train')
    kmeans._prepare_for_training(data)

    assert kmeans.mini_batch_size == 5000
예제 #9
0
def test_predictor_type(sagemaker_session):
    kmeans = KMeans(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)
    data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train')
    kmeans.fit(data, MINI_BATCH_SIZE)
    model = kmeans.create_model()
    predictor = model.deploy(1, TRAIN_INSTANCE_TYPE)

    assert isinstance(predictor, KMeansPredictor)
예제 #10
0
def test_call_fit(base_fit, sagemaker_session):
    kmeans = KMeans(base_job_name='kmeans', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)

    data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train')

    kmeans.fit(data, MINI_BATCH_SIZE)

    base_fit.assert_called_once()
    assert len(base_fit.call_args[0]) == 2
    assert base_fit.call_args[0][0] == data
    assert base_fit.call_args[0][1] == MINI_BATCH_SIZE
예제 #11
0
def test_call_fit(base_fit, sagemaker_session):
    kmeans = KMeans(base_job_name='kmeans', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)

    data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train')

    kmeans.fit(data, MINI_BATCH_SIZE)

    base_fit.assert_called_once()
    assert len(base_fit.call_args[0]) == 2
    assert base_fit.call_args[0][0] == data
    assert base_fit.call_args[0][1] == MINI_BATCH_SIZE
예제 #12
0
def test_prepare_for_training_wrong_value_mini_batch_size(sagemaker_session):
    kmeans = KMeans(base_job_name="kmeans", sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)

    data = RecordSet(
        "s3://{}/{}".format(BUCKET_NAME, PREFIX),
        num_records=1,
        feature_dim=FEATURE_DIM,
        channel="train",
    )
    with pytest.raises(ValueError):
        kmeans._prepare_for_training(data, 0)
예제 #13
0
def test_model_image(sagemaker_session):
    kmeans = KMeans(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)
    data = RecordSet(
        "s3://{}/{}".format(BUCKET_NAME, PREFIX),
        num_records=1,
        feature_dim=FEATURE_DIM,
        channel="train",
    )
    kmeans.fit(data, MINI_BATCH_SIZE)

    model = kmeans.create_model()
    assert image_uris.retrieve("kmeans", REGION) == model.image_uri
예제 #14
0
def test_prepare_for_training_wrong_type_mini_batch_size(sagemaker_session):
    kmeans = KMeans(base_job_name='kmeans',
                    sagemaker_session=sagemaker_session,
                    **ALL_REQ_ARGS)

    data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX),
                     num_records=1,
                     feature_dim=FEATURE_DIM,
                     channel='train')

    with pytest.raises((TypeError, ValueError)):
        kmeans._prepare_for_training(data, 'some')
예제 #15
0
def test_prepare_for_training_no_mini_batch_size(sagemaker_session):
    kmeans = KMeans(base_job_name="kmeans", sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)

    data = RecordSet(
        "s3://{}/{}".format(BUCKET_NAME, PREFIX),
        num_records=1,
        feature_dim=FEATURE_DIM,
        channel="train",
    )
    kmeans._prepare_for_training(data)

    assert kmeans.mini_batch_size == 5000
예제 #16
0
def test_init_required_named(sagemaker_session):
    kmeans = KMeans(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)

    assert kmeans.role == COMMON_TRAIN_ARGS['role']
    assert kmeans.train_instance_count == TRAIN_INSTANCE_COUNT
    assert kmeans.train_instance_type == COMMON_TRAIN_ARGS['train_instance_type']
    assert kmeans.k == ALL_REQ_ARGS['k']
예제 #17
0
def test_init_required_named(sagemaker_session):
    kmeans = KMeans(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)

    assert kmeans.role == COMMON_TRAIN_ARGS["role"]
    assert kmeans.instance_count == INSTANCE_COUNT
    assert kmeans.instance_type == COMMON_TRAIN_ARGS["instance_type"]
    assert kmeans.k == ALL_REQ_ARGS["k"]
예제 #18
0
def test_init_required_positional(sagemaker_session):
    kmeans = KMeans(
        ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, K, sagemaker_session=sagemaker_session
    )
    assert kmeans.role == ROLE
    assert kmeans.train_instance_count == TRAIN_INSTANCE_COUNT
    assert kmeans.train_instance_type == TRAIN_INSTANCE_TYPE
    assert kmeans.k == K
예제 #19
0
def test_all_hyperparameters(sagemaker_session):
    kmeans = KMeans(sagemaker_session=sagemaker_session, init_method='random', max_iterations=3, tol=0.5,
                    num_trials=5, local_init_method='kmeans++', half_life_time_size=0, epochs=10, center_factor=2,
                    eval_metrics=['msd', 'ssd'], **ALL_REQ_ARGS)
    assert kmeans.hyperparameters() == dict(
        k=str(ALL_REQ_ARGS['k']),
        init_method='random',
        local_lloyd_max_iter='3',
        local_lloyd_tol='0.5',
        local_lloyd_num_trials='5',
        local_lloyd_init_method='kmeans++',
        half_life_time_size='0',
        epochs='10',
        extra_center_factor='2',
        eval_metrics='[\'msd\', \'ssd\']',
        force_dense='True',
    )
예제 #20
0
def test_predictor_custom_serialization(sagemaker_session):
    kmeans = KMeans(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)
    data = RecordSet(
        "s3://{}/{}".format(BUCKET_NAME, PREFIX),
        num_records=1,
        feature_dim=FEATURE_DIM,
        channel="train",
    )
    kmeans.fit(data, MINI_BATCH_SIZE)
    model = kmeans.create_model()
    custom_serializer = Mock()
    custom_deserializer = Mock()
    predictor = model.deploy(
        1,
        INSTANCE_TYPE,
        serializer=custom_serializer,
        deserializer=custom_deserializer,
    )

    assert isinstance(predictor, KMeansPredictor)
    assert predictor.serializer is custom_serializer
    assert predictor.deserializer is custom_deserializer
예제 #21
0
def test_optional_hyper_parameters_value(sagemaker_session,
                                         optional_hyper_parameters, value):
    with pytest.raises(ValueError):
        test_params = ALL_REQ_ARGS.copy()
        test_params.update({optional_hyper_parameters: value})
        KMeans(sagemaker_session=sagemaker_session, **test_params)
예제 #22
0
def test_iterable_hyper_parameters_type(sagemaker_session,
                                        iterable_hyper_parameters, value):
    with pytest.raises(TypeError):
        test_params = ALL_REQ_ARGS.copy()
        test_params.update({iterable_hyper_parameters: value})
        KMeans(sagemaker_session=sagemaker_session, **test_params)
예제 #23
0
def test_required_hyper_parameters_value(sagemaker_session,
                                         required_hyper_parameters, value):
    with pytest.raises(ValueError):
        test_params = ALL_REQ_ARGS.copy()
        test_params[required_hyper_parameters] = value
        KMeans(sagemaker_session=sagemaker_session, **test_params)
예제 #24
0
def test_image(sagemaker_session):
    kmeans = KMeans(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)
    assert kmeans.train_image() == registry(REGION, "kmeans") + "/kmeans:1"
예제 #25
0
def test_image(sagemaker_session):
    kmeans = KMeans(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)
    assert image_uris.retrieve("kmeans", REGION) == kmeans.training_image_uri()
예제 #26
0
def test_fit_1p(sagemaker_session):
    data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
    pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'}

    # Load the data into memory as numpy arrays
    with gzip.open(data_path, 'rb') as f:
        train_set, _, _ = pickle.load(f, **pickle_args)

    kmeans = KMeans(role='SageMakerRole',
                    train_instance_count=1,
                    train_instance_type='ml.c4.xlarge',
                    k=10,
                    sagemaker_session=sagemaker_session,
                    base_job_name='tk',
                    output_path='s3://{}/'.format(
                        sagemaker_session.default_bucket()))

    # set kmeans specific hp
    kmeans.init_method = 'random'
    kmeans.max_iterators = 1
    kmeans.tol = 1
    kmeans.num_trials = 1
    kmeans.local_init_method = 'kmeans++'
    kmeans.half_life_time_size = 1
    kmeans.epochs = 1

    records = kmeans.record_set(train_set[0][:100])
    test_records = kmeans.record_set(train_set[0][:100], channel='test')

    # specify which hp you want to optimize over
    hyperparameter_ranges = {
        'extra_center_factor': IntegerParameter(1, 10),
        'mini_batch_size': IntegerParameter(10, 100),
        'epochs': IntegerParameter(1, 2),
        'init_method': CategoricalParameter(['kmeans++', 'random'])
    }
    objective_metric_name = 'test:msd'

    tuner = HyperparameterTuner(estimator=kmeans,
                                objective_metric_name=objective_metric_name,
                                hyperparameter_ranges=hyperparameter_ranges,
                                objective_type='Minimize',
                                max_jobs=2,
                                max_parallel_jobs=2)

    tuner.fit([records, test_records])

    print('Started HPO job with name:' + tuner.latest_tuning_job.name)
예제 #27
0
def test_image(sagemaker_session):
    kmeans = KMeans(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)
    assert kmeans.train_image() == registry(REGION, 'kmeans') + '/kmeans:1'