def test_model_image(sagemaker_session): kmeans = KMeans(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') kmeans.fit(data, MINI_BATCH_SIZE) model = kmeans.create_model() assert model.image == registry(REGION, 'kmeans') + '/kmeans:1'
def test_all_hyperparameters(sagemaker_session): kmeans = KMeans(sagemaker_session=sagemaker_session, init_method='random', max_iterations=3, tol=0.5, num_trials=5, local_init_method='kmeans++', half_life_time_size=0, epochs=10, center_factor=2, eval_metrics=['msd', 'ssd'], **ALL_REQ_ARGS) assert kmeans.hyperparameters() == dict( k=str(ALL_REQ_ARGS['k']), init_method='random', local_lloyd_max_iter='3', local_lloyd_tol='0.5', local_lloyd_num_trials='5', local_lloyd_init_method='kmeans++', half_life_time_size='0', epochs='10', extra_center_factor='2', eval_metrics='[\'msd\', \'ssd\']', force_dense='True', )
def test_prepare_for_training_wrong_value_mini_batch_size(sagemaker_session): kmeans = KMeans(base_job_name='kmeans', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') with pytest.raises(ValueError): kmeans._prepare_for_training(data, 0)
def test_call_fit_wrong_value_mini_batch_size(sagemaker_session): kmeans = KMeans(base_job_name='kmeans', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') with pytest.raises(ValueError): kmeans.fit(data, 0)
def test_all_hyperparameters(sagemaker_session): kmeans = KMeans(sagemaker_session=sagemaker_session, init_method="random", max_iterations=3, tol=0.5, num_trials=5, local_init_method="kmeans++", half_life_time_size=0, epochs=10, center_factor=2, eval_metrics=["msd", "ssd"], **ALL_REQ_ARGS) assert kmeans.hyperparameters() == dict( k=str(ALL_REQ_ARGS["k"]), init_method="random", local_lloyd_max_iter="3", local_lloyd_tol="0.5", local_lloyd_num_trials="5", local_lloyd_init_method="kmeans++", half_life_time_size="0", epochs="10", extra_center_factor="2", eval_metrics='["msd", "ssd"]', force_dense="True", )
def test_predictor_type(sagemaker_session): kmeans = KMeans(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') kmeans.fit(data, MINI_BATCH_SIZE) model = kmeans.create_model() predictor = model.deploy(1, TRAIN_INSTANCE_TYPE) assert isinstance(predictor, KMeansPredictor)
def test_prepare_for_training_no_mini_batch_size(sagemaker_session): kmeans = KMeans(base_job_name='kmeans', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') kmeans._prepare_for_training(data) assert kmeans.mini_batch_size == 5000
def test_call_fit(base_fit, sagemaker_session): kmeans = KMeans(base_job_name='kmeans', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') kmeans.fit(data, MINI_BATCH_SIZE) base_fit.assert_called_once() assert len(base_fit.call_args[0]) == 2 assert base_fit.call_args[0][0] == data assert base_fit.call_args[0][1] == MINI_BATCH_SIZE
def test_prepare_for_training_wrong_value_mini_batch_size(sagemaker_session): kmeans = KMeans(base_job_name="kmeans", sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet( "s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel="train", ) with pytest.raises(ValueError): kmeans._prepare_for_training(data, 0)
def test_model_image(sagemaker_session): kmeans = KMeans(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet( "s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel="train", ) kmeans.fit(data, MINI_BATCH_SIZE) model = kmeans.create_model() assert image_uris.retrieve("kmeans", REGION) == model.image_uri
def test_prepare_for_training_wrong_type_mini_batch_size(sagemaker_session): kmeans = KMeans(base_job_name='kmeans', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') with pytest.raises((TypeError, ValueError)): kmeans._prepare_for_training(data, 'some')
def test_prepare_for_training_no_mini_batch_size(sagemaker_session): kmeans = KMeans(base_job_name="kmeans", sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet( "s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel="train", ) kmeans._prepare_for_training(data) assert kmeans.mini_batch_size == 5000
def test_init_required_named(sagemaker_session): kmeans = KMeans(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) assert kmeans.role == COMMON_TRAIN_ARGS['role'] assert kmeans.train_instance_count == TRAIN_INSTANCE_COUNT assert kmeans.train_instance_type == COMMON_TRAIN_ARGS['train_instance_type'] assert kmeans.k == ALL_REQ_ARGS['k']
def test_init_required_named(sagemaker_session): kmeans = KMeans(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) assert kmeans.role == COMMON_TRAIN_ARGS["role"] assert kmeans.instance_count == INSTANCE_COUNT assert kmeans.instance_type == COMMON_TRAIN_ARGS["instance_type"] assert kmeans.k == ALL_REQ_ARGS["k"]
def test_init_required_positional(sagemaker_session): kmeans = KMeans( ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, K, sagemaker_session=sagemaker_session ) assert kmeans.role == ROLE assert kmeans.train_instance_count == TRAIN_INSTANCE_COUNT assert kmeans.train_instance_type == TRAIN_INSTANCE_TYPE assert kmeans.k == K
def test_predictor_custom_serialization(sagemaker_session): kmeans = KMeans(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet( "s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel="train", ) kmeans.fit(data, MINI_BATCH_SIZE) model = kmeans.create_model() custom_serializer = Mock() custom_deserializer = Mock() predictor = model.deploy( 1, INSTANCE_TYPE, serializer=custom_serializer, deserializer=custom_deserializer, ) assert isinstance(predictor, KMeansPredictor) assert predictor.serializer is custom_serializer assert predictor.deserializer is custom_deserializer
def test_optional_hyper_parameters_value(sagemaker_session, optional_hyper_parameters, value): with pytest.raises(ValueError): test_params = ALL_REQ_ARGS.copy() test_params.update({optional_hyper_parameters: value}) KMeans(sagemaker_session=sagemaker_session, **test_params)
def test_iterable_hyper_parameters_type(sagemaker_session, iterable_hyper_parameters, value): with pytest.raises(TypeError): test_params = ALL_REQ_ARGS.copy() test_params.update({iterable_hyper_parameters: value}) KMeans(sagemaker_session=sagemaker_session, **test_params)
def test_required_hyper_parameters_value(sagemaker_session, required_hyper_parameters, value): with pytest.raises(ValueError): test_params = ALL_REQ_ARGS.copy() test_params[required_hyper_parameters] = value KMeans(sagemaker_session=sagemaker_session, **test_params)
def test_image(sagemaker_session): kmeans = KMeans(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) assert kmeans.train_image() == registry(REGION, "kmeans") + "/kmeans:1"
def test_image(sagemaker_session): kmeans = KMeans(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) assert image_uris.retrieve("kmeans", REGION) == kmeans.training_image_uri()
def test_fit_1p(sagemaker_session): data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'} # Load the data into memory as numpy arrays with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) kmeans = KMeans(role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge', k=10, sagemaker_session=sagemaker_session, base_job_name='tk', output_path='s3://{}/'.format( sagemaker_session.default_bucket())) # set kmeans specific hp kmeans.init_method = 'random' kmeans.max_iterators = 1 kmeans.tol = 1 kmeans.num_trials = 1 kmeans.local_init_method = 'kmeans++' kmeans.half_life_time_size = 1 kmeans.epochs = 1 records = kmeans.record_set(train_set[0][:100]) test_records = kmeans.record_set(train_set[0][:100], channel='test') # specify which hp you want to optimize over hyperparameter_ranges = { 'extra_center_factor': IntegerParameter(1, 10), 'mini_batch_size': IntegerParameter(10, 100), 'epochs': IntegerParameter(1, 2), 'init_method': CategoricalParameter(['kmeans++', 'random']) } objective_metric_name = 'test:msd' tuner = HyperparameterTuner(estimator=kmeans, objective_metric_name=objective_metric_name, hyperparameter_ranges=hyperparameter_ranges, objective_type='Minimize', max_jobs=2, max_parallel_jobs=2) tuner.fit([records, test_records]) print('Started HPO job with name:' + tuner.latest_tuning_job.name)
def test_image(sagemaker_session): kmeans = KMeans(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) assert kmeans.train_image() == registry(REGION, 'kmeans') + '/kmeans:1'