def test_model_image(sagemaker_session):
    randomcutforest = RandomCutForest(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)
    data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train')
    randomcutforest.fit(data, MINI_BATCH_SIZE)

    model = randomcutforest.create_model()
    assert model.image == registry(REGION, "randomcutforest") + '/randomcutforest:1'
def test_model_image(sagemaker_session):
    randomcutforest = RandomCutForest(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)
    data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train')
    randomcutforest.fit(data, MINI_BATCH_SIZE)

    model = randomcutforest.create_model()
    assert model.image == registry(REGION, "randomcutforest") + '/randomcutforest:1'
def test_predictor_type(sagemaker_session):
    randomcutforest = RandomCutForest(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)
    data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train')
    randomcutforest.fit(data, MINI_BATCH_SIZE)
    model = randomcutforest.create_model()
    predictor = model.deploy(1, TRAIN_INSTANCE_TYPE)

    assert isinstance(predictor, RandomCutForestPredictor)
def test_predictor_type(sagemaker_session):
    randomcutforest = RandomCutForest(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)
    data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train')
    randomcutforest.fit(data, MINI_BATCH_SIZE)
    model = randomcutforest.create_model()
    predictor = model.deploy(1, TRAIN_INSTANCE_TYPE)

    assert isinstance(predictor, RandomCutForestPredictor)
def test_prepare_for_training_no_mini_batch_size(sagemaker_session):
    randomcutforest = RandomCutForest(base_job_name="randomcutforest", sagemaker_session=sagemaker_session,
                                      **ALL_REQ_ARGS)

    data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM,
                     channel='train')
    randomcutforest._prepare_for_training(data)

    assert randomcutforest.mini_batch_size == MINI_BATCH_SIZE
def test_prepare_for_training_feature_dim_greater_than_max_allowed(sagemaker_session):
    randomcutforest = RandomCutForest(base_job_name="randomcutforest", sagemaker_session=sagemaker_session,
                                      **ALL_REQ_ARGS)

    data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=MAX_FEATURE_DIM + 1,
                     channel='train')

    with pytest.raises((TypeError, ValueError)):
        randomcutforest._prepare_for_training(data)
def test_prepare_for_training_wrong_type_mini_batch_size(sagemaker_session):
    randomcutforest = RandomCutForest(base_job_name="randomcutforest", sagemaker_session=sagemaker_session,
                                      **ALL_REQ_ARGS)

    data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM,
                     channel='train')

    with pytest.raises((TypeError, ValueError)):
        randomcutforest._prepare_for_training(data, 1234)
def test_prepare_for_training_no_mini_batch_size(sagemaker_session):
    randomcutforest = RandomCutForest(base_job_name="randomcutforest", sagemaker_session=sagemaker_session,
                                      **ALL_REQ_ARGS)

    data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM,
                     channel='train')
    randomcutforest._prepare_for_training(data)

    assert randomcutforest.mini_batch_size == MINI_BATCH_SIZE
def test_all_hyperparameters(sagemaker_session):
    randomcutforest = RandomCutForest(sagemaker_session=sagemaker_session, num_trees=NUM_TREES,
                                      num_samples_per_tree=NUM_SAMPLES_PER_TREE,
                                      eval_metrics=EVAL_METRICS, **ALL_REQ_ARGS)
    assert randomcutforest.hyperparameters() == dict(
        num_samples_per_tree=str(NUM_SAMPLES_PER_TREE),
        num_trees=str(NUM_TREES),
        eval_metrics="{}".format(EVAL_METRICS)
    )
def test_prepare_for_training_feature_dim_greater_than_max_allowed(sagemaker_session):
    randomcutforest = RandomCutForest(base_job_name="randomcutforest", sagemaker_session=sagemaker_session,
                                      **ALL_REQ_ARGS)

    data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=MAX_FEATURE_DIM + 1,
                     channel='train')

    with pytest.raises((TypeError, ValueError)):
        randomcutforest._prepare_for_training(data)
def test_prepare_for_training_wrong_type_mini_batch_size(sagemaker_session):
    randomcutforest = RandomCutForest(base_job_name="randomcutforest", sagemaker_session=sagemaker_session,
                                      **ALL_REQ_ARGS)

    data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM,
                     channel='train')

    with pytest.raises((TypeError, ValueError)):
        randomcutforest._prepare_for_training(data, 1234)
def test_all_hyperparameters(sagemaker_session):
    randomcutforest = RandomCutForest(sagemaker_session=sagemaker_session, num_trees=NUM_TREES,
                                      num_samples_per_tree=NUM_SAMPLES_PER_TREE,
                                      eval_metrics=EVAL_METRICS, **ALL_REQ_ARGS)
    assert randomcutforest.hyperparameters() == dict(
        num_samples_per_tree=str(NUM_SAMPLES_PER_TREE),
        num_trees=str(NUM_TREES),
        eval_metrics="{}".format(EVAL_METRICS)
    )
Example #13
0
def test_call_fit_none_mini_batch_size(sagemaker_session):
    randomcutforest = RandomCutForest(base_job_name="randomcutforest",
                                      sagemaker_session=sagemaker_session,
                                      **ALL_REQ_ARGS)

    data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX),
                     num_records=1,
                     feature_dim=FEATURE_DIM,
                     channel='train')
    randomcutforest.fit(data)
def test_call_fit(base_fit, sagemaker_session):
    randomcutforest = RandomCutForest(base_job_name="randomcutforest", sagemaker_session=sagemaker_session,
                                      **ALL_REQ_ARGS)

    data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train')

    randomcutforest.fit(data, MINI_BATCH_SIZE)

    base_fit.assert_called_once()
    assert len(base_fit.call_args[0]) == 2
    assert base_fit.call_args[0][0] == data
    assert base_fit.call_args[0][1] == MINI_BATCH_SIZE
Example #15
0
def test_model_image(sagemaker_session):
    randomcutforest = RandomCutForest(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)
    data = RecordSet(
        "s3://{}/{}".format(BUCKET_NAME, PREFIX),
        num_records=1,
        feature_dim=FEATURE_DIM,
        channel="train",
    )
    randomcutforest.fit(data, MINI_BATCH_SIZE)

    model = randomcutforest.create_model()
    assert image_uris.retrieve("randomcutforest", REGION) == model.image_uri
def test_call_fit(base_fit, sagemaker_session):
    randomcutforest = RandomCutForest(base_job_name="randomcutforest", sagemaker_session=sagemaker_session,
                                      **ALL_REQ_ARGS)

    data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train')

    randomcutforest.fit(data, MINI_BATCH_SIZE)

    base_fit.assert_called_once()
    assert len(base_fit.call_args[0]) == 2
    assert base_fit.call_args[0][0] == data
    assert base_fit.call_args[0][1] == MINI_BATCH_SIZE
def test_all_hyperparameters(sagemaker_session):
    randomcutforest = RandomCutForest(
        sagemaker_session=sagemaker_session,
        num_trees=NUM_TREES,
        num_samples_per_tree=NUM_SAMPLES_PER_TREE,
        eval_metrics=EVAL_METRICS,
        **ALL_REQ_ARGS)
    assert randomcutforest.hyperparameters() == dict(
        num_samples_per_tree=str(NUM_SAMPLES_PER_TREE),
        num_trees=str(NUM_TREES),
        eval_metrics='["accuracy", "precision_recall_fscore"]',
    )
def test_init_required_named(sagemaker_session):
    randomcutforest = RandomCutForest(sagemaker_session=sagemaker_session,
                                      **ALL_REQ_ARGS)

    assert randomcutforest.role == COMMON_TRAIN_ARGS["role"]
    assert randomcutforest.instance_count == INSTANCE_COUNT
    assert randomcutforest.instance_type == COMMON_TRAIN_ARGS["instance_type"]
def test_init_required_positional(sagemaker_session):
    randomcutforest = RandomCutForest(ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE,
                                      NUM_SAMPLES_PER_TREE, NUM_TREES, EVAL_METRICS,
                                      sagemaker_session=sagemaker_session)
    assert randomcutforest.role == ROLE
    assert randomcutforest.train_instance_count == TRAIN_INSTANCE_COUNT
    assert randomcutforest.train_instance_type == TRAIN_INSTANCE_TYPE
    assert randomcutforest.num_trees == NUM_TREES
    assert randomcutforest.num_samples_per_tree == NUM_SAMPLES_PER_TREE
    assert randomcutforest.eval_metrics == EVAL_METRICS
Example #20
0
def __rcf_training_job(
    sagemaker_session, container_image, cpu_instance_type, num_trees, num_samples_per_tree
):
    job_name = unique_name_from_base("randomcutforest")
    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        # Generate a thousand 14-dimensional datapoints.
        feature_num = 14
        train_input = numpy.random.rand(1000, feature_num)

        rcf = RandomCutForest(
            role=ROLE,
            instance_count=1,
            instance_type=cpu_instance_type,
            num_trees=num_trees,
            num_samples_per_tree=num_samples_per_tree,
            eval_metrics=["accuracy", "precision_recall_fscore"],
            sagemaker_session=sagemaker_session,
        )

        rcf.fit(records=rcf.record_set(train_input), job_name=job_name)

        # Replace the container image value with a multi-model container image for now since the
        # frameworks do not support multi-model container image yet.
        rcf_model = rcf.create_model()
        rcf_model.image_uri = container_image
        return rcf_model
Example #21
0
def test_optional_hyper_parameters_value(sagemaker_session, optional_hyper_parameters, value):
    with pytest.raises(ValueError):
        test_params = ALL_REQ_ARGS.copy()
        test_params.update({optional_hyper_parameters: value})
        RandomCutForest(sagemaker_session=sagemaker_session, **test_params)
Example #22
0
def test_iterable_hyper_parameters_type(sagemaker_session, iterable_hyper_parameters, value):
    with pytest.raises(TypeError):
        test_params = ALL_REQ_ARGS.copy()
        test_params.update({iterable_hyper_parameters: value})
        RandomCutForest(sagemaker_session=sagemaker_session, **test_params)
Example #23
0
def test_image(sagemaker_session):
    randomcutforest = RandomCutForest(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)
    assert image_uris.retrieve("randomcutforest", REGION) == randomcutforest.training_image_uri()
def test_image(sagemaker_session):
    randomcutforest = RandomCutForest(sagemaker_session=sagemaker_session,
                                      **ALL_REQ_ARGS)
    assert (
        randomcutforest.train_image() == registry(REGION, "randomcutforest") +
        "/randomcutforest:1")
def test_image(sagemaker_session):
    randomcutforest = RandomCutForest(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)
    assert randomcutforest.train_image() == registry(REGION, "randomcutforest") + '/randomcutforest:1'