def __rcf_training_job( sagemaker_session, container_image, cpu_instance_type, num_trees, num_samples_per_tree ): job_name = unique_name_from_base("randomcutforest") with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): # Generate a thousand 14-dimensional datapoints. feature_num = 14 train_input = numpy.random.rand(1000, feature_num) rcf = RandomCutForest( role=ROLE, instance_count=1, instance_type=cpu_instance_type, num_trees=num_trees, num_samples_per_tree=num_samples_per_tree, eval_metrics=["accuracy", "precision_recall_fscore"], sagemaker_session=sagemaker_session, ) rcf.fit(records=rcf.record_set(train_input), job_name=job_name) # Replace the container image value with a multi-model container image for now since the # frameworks do not support multi-model container image yet. rcf_model = rcf.create_model() rcf_model.image_uri = container_image return rcf_model
def test_model_image(sagemaker_session): randomcutforest = RandomCutForest(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') randomcutforest.fit(data, MINI_BATCH_SIZE) model = randomcutforest.create_model() assert model.image == registry(REGION, "randomcutforest") + '/randomcutforest:1'
def test_model_image(sagemaker_session): randomcutforest = RandomCutForest(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') randomcutforest.fit(data, MINI_BATCH_SIZE) model = randomcutforest.create_model() assert model.image == registry(REGION, "randomcutforest") + '/randomcutforest:1'
def test_predictor_type(sagemaker_session): randomcutforest = RandomCutForest(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') randomcutforest.fit(data, MINI_BATCH_SIZE) model = randomcutforest.create_model() predictor = model.deploy(1, TRAIN_INSTANCE_TYPE) assert isinstance(predictor, RandomCutForestPredictor)
def test_predictor_type(sagemaker_session): randomcutforest = RandomCutForest(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') randomcutforest.fit(data, MINI_BATCH_SIZE) model = randomcutforest.create_model() predictor = model.deploy(1, TRAIN_INSTANCE_TYPE) assert isinstance(predictor, RandomCutForestPredictor)
def test_call_fit_none_mini_batch_size(sagemaker_session): randomcutforest = RandomCutForest(base_job_name="randomcutforest", sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') randomcutforest.fit(data)
def test_model_image(sagemaker_session): randomcutforest = RandomCutForest(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet( "s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel="train", ) randomcutforest.fit(data, MINI_BATCH_SIZE) model = randomcutforest.create_model() assert image_uris.retrieve("randomcutforest", REGION) == model.image_uri
def test_call_fit_feature_dim_greater_than_max_allowed(sagemaker_session): randomcutforest = RandomCutForest(base_job_name="randomcutforest", sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=MAX_FEATURE_DIM + 1, channel='train') with pytest.raises((TypeError, ValueError)): randomcutforest.fit(data)
def test_call_fit_wrong_type_mini_batch_size(sagemaker_session): randomcutforest = RandomCutForest(base_job_name="randomcutforest", sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') with pytest.raises((TypeError, ValueError)): randomcutforest.fit(data, 1234)
def test_call_fit(base_fit, sagemaker_session): randomcutforest = RandomCutForest(base_job_name="randomcutforest", sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') randomcutforest.fit(data, MINI_BATCH_SIZE) base_fit.assert_called_once() assert len(base_fit.call_args[0]) == 2 assert base_fit.call_args[0][0] == data assert base_fit.call_args[0][1] == MINI_BATCH_SIZE
def test_call_fit(base_fit, sagemaker_session): randomcutforest = RandomCutForest(base_job_name="randomcutforest", sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') randomcutforest.fit(data, MINI_BATCH_SIZE) base_fit.assert_called_once() assert len(base_fit.call_args[0]) == 2 assert base_fit.call_args[0][0] == data assert base_fit.call_args[0][1] == MINI_BATCH_SIZE