def test_model_image(sagemaker_session): pca = PCA(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet( "s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel="train", ) pca.fit(data, MINI_BATCH_SIZE) model = pca.create_model() assert image_uris.retrieve("pca", REGION) == model.image_uri
def test_prepare_for_training_no_mini_batch_size(sagemaker_session): pca = PCA(base_job_name='pca', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') pca._prepare_for_training(data) assert pca.mini_batch_size == 1
def test_call_fit(base_fit, sagemaker_session): randomcutforest = RandomCutForest(base_job_name="randomcutforest", sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') randomcutforest.fit(data, MINI_BATCH_SIZE) base_fit.assert_called_once() assert len(base_fit.call_args[0]) == 2 assert base_fit.call_args[0][0] == data assert base_fit.call_args[0][1] == MINI_BATCH_SIZE
def test_predictor_type(sagemaker_session): fm = FactorizationMachines(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') fm.fit(data, MINI_BATCH_SIZE) model = fm.create_model() predictor = model.deploy(1, TRAIN_INSTANCE_TYPE) assert isinstance(predictor, FactorizationMachinesPredictor)
def test_model_image(sagemaker_session): fm = FactorizationMachines(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') fm.fit(data, MINI_BATCH_SIZE) model = fm.create_model() assert model.image == registry( REGION, 'factorization-machines') + '/factorization-machines:1'
def test_prepare_for_training_wrong_type_mini_batch_size(sagemaker_session): pca = PCA(base_job_name='pca', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') with pytest.raises((TypeError, ValueError)): pca.fit(data, 'some')
def test_prepare_for_training_no_mini_batch_size(sagemaker_session): fm = FactorizationMachines(base_job_name="fm", sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet( "s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel="train", ) fm._prepare_for_training(data)
def test_prepare_for_training_wrong_type_mini_batch_size(sagemaker_session): lda = LDA(base_job_name="lda", sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet( "s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel="train", ) with pytest.raises(ValueError): lda._prepare_for_training(data, "some")
def test_model_image(sagemaker_session): lda = LDA(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet( "s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel="train", ) lda.fit(data, MINI_BATCH_SZIE) model = lda.create_model() assert model.image == registry(REGION, "lda") + "/lda:1"
def test_call_fit_none_mini_batch_size(sagemaker_session): ntm = NTM(base_job_name="ntm", sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet( "s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel="train", ) ntm.fit(data)
def test_model_image(sagemaker_session): ntm = NTM(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet( "s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel="train", ) ntm.fit(data, MINI_BATCH_SIZE) model = ntm.create_model() assert model.image == registry(REGION, "ntm") + "/ntm:1"
def test_model_image(sagemaker_session): object2vec = Object2Vec(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet( "s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel="train", ) object2vec.fit(data, MINI_BATCH_SIZE) model = object2vec.create_model() assert model.image == registry(REGION, "object2vec") + "/object2vec:1"
def test_prepare_for_training_wrong_value_lower_mini_batch_size( sagemaker_session): knn = KNN(base_job_name="knn", sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') with pytest.raises(ValueError): knn._prepare_for_training(data, 0)
def test_prepare_for_training_calculate_batch_size_1(sagemaker_session): lr = LinearLearner(base_job_name="lr", sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet( "s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel="train", ) lr._prepare_for_training(data) assert lr.mini_batch_size == 1
def test_prepare_for_training_feature_dim_greater_than_max_allowed( sagemaker_session): randomcutforest = RandomCutForest(base_job_name="randomcutforest", sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=MAX_FEATURE_DIM + 1, channel='train') with pytest.raises((TypeError, ValueError)): randomcutforest._prepare_for_training(data)
def test_predictor_type(sagemaker_session): lda = LDA(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet( "s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel="train", ) lda.fit(data, MINI_BATCH_SZIE) model = lda.create_model() predictor = model.deploy(1, TRAIN_INSTANCE_TYPE) assert isinstance(predictor, LDAPredictor)
def test_model_image(sagemaker_session): ipinsights = IPInsights(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet( "s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel="train", ) ipinsights.fit(data, MINI_BATCH_SIZE) model = ipinsights.create_model() assert model.image == registry(REGION, "ipinsights") + "/ipinsights:1"
def test_prepare_for_training_calculate_batch_size_2(sagemaker_session): lr = LinearLearner(base_job_name='lr', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=10000, feature_dim=FEATURE_DIM, channel='train') lr._prepare_for_training(data) assert lr.mini_batch_size == DEFAULT_MINI_BATCH_SIZE
def test_predictor_type(sagemaker_session): object2vec = Object2Vec(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet( "s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel="train", ) object2vec.fit(data, MINI_BATCH_SIZE) model = object2vec.create_model() predictor = model.deploy(1, TRAIN_INSTANCE_TYPE) assert isinstance(predictor, RealTimePredictor)
def test_prepare_for_training_multiple_channel(sagemaker_session): lr = LinearLearner(base_job_name="lr", sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet( "s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=10000, feature_dim=FEATURE_DIM, channel="train", ) lr._prepare_for_training([data, data]) assert lr.mini_batch_size == DEFAULT_MINI_BATCH_SIZE
def test_prepare_for_training_multiple_channel(sagemaker_session): lr = PCA(base_job_name='lr', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') lr._prepare_for_training([data, data]) assert lr.mini_batch_size == 1
def test_predictor_type(sagemaker_session): ipinsights = IPInsights(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet( "s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel="train", ) ipinsights.fit(data, MINI_BATCH_SIZE) model = ipinsights.create_model() predictor = model.deploy(1, INSTANCE_TYPE) assert isinstance(predictor, IPInsightsPredictor)
def test_model_image(sagemaker_session): lr = LinearLearner(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet( "s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel="train", ) lr.fit(data) model = lr.create_model() assert model.image == registry(REGION, "linear-learner") + "/linear-learner:1"
def test_predictor_type(sagemaker_session): randomcutforest = RandomCutForest(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet( "s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel="train", ) randomcutforest.fit(data, MINI_BATCH_SIZE) model = randomcutforest.create_model() predictor = model.deploy(1, INSTANCE_TYPE) assert isinstance(predictor, RandomCutForestPredictor)
def test_prepare_for_training_wrong_value_upper_mini_batch_size(sagemaker_session): ipinsights = IPInsights( base_job_name="ipinsights", sagemaker_session=sagemaker_session, **ALL_REQ_ARGS ) data = RecordSet( "s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel="train", ) with pytest.raises(ValueError): ipinsights._prepare_for_training(data, 500001)
def test_predictor_type(sagemaker_session): lr = LinearLearner(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet( "s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel="train", ) lr.fit(data) model = lr.create_model() predictor = model.deploy(1, INSTANCE_TYPE) assert isinstance(predictor, LinearLearnerPredictor)
def test_prepare_for_training_wrong_type_mini_batch_size(sagemaker_session): randomcutforest = RandomCutForest( base_job_name="randomcutforest", sagemaker_session=sagemaker_session, **ALL_REQ_ARGS ) data = RecordSet( "s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel="train", ) with pytest.raises((TypeError, ValueError)): randomcutforest._prepare_for_training(data, 1234)
def test_prepare_for_training_no_mini_batch_size(sagemaker_session): randomcutforest = RandomCutForest( base_job_name="randomcutforest", sagemaker_session=sagemaker_session, **ALL_REQ_ARGS ) data = RecordSet( "s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel="train", ) randomcutforest._prepare_for_training(data) assert randomcutforest.mini_batch_size == MINI_BATCH_SIZE
def test_prepare_for_training_no_mini_batch_size(sagemaker_session): kmeans = KMeans(base_job_name="kmeans", sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet( "s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel="train", ) kmeans._prepare_for_training(data) assert kmeans.mini_batch_size == 5000
def test_fit_pca_with_inter_container_traffic_encryption_flag(sagemaker_session, tuner): pca = PCA(ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, NUM_COMPONENTS, base_job_name='pca', sagemaker_session=sagemaker_session, encrypt_inter_container_traffic=True) tuner.estimator = pca records = RecordSet(s3_data=INPUTS, num_records=1, feature_dim=1) tuner.fit(records, mini_batch_size=9999) _, _, tune_kwargs = sagemaker_session.tune.mock_calls[0] assert tune_kwargs['job_name'].startswith('pca') assert tune_kwargs['encrypt_inter_container_traffic'] is True