Example #1
0
def test_init_sigma_fail_value_high(sagemaker_session):
    with pytest.raises(ValueError):
        LinearLearner(init_sigma=1,
                      sagemaker_session=sagemaker_session,
                      **REQ_ARGS)
def test_async_linear_learner(sagemaker_session):
    training_job_name = ""
    endpoint_name = 'test-linear-learner-async-{}'.format(
        sagemaker_timestamp())

    with timeout(minutes=5):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {
            'encoding': 'latin1'
        }

        # Load the data into memory as numpy arrays
        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        train_set[1][:100] = 1
        train_set[1][100:200] = 0
        train_set = train_set[0], train_set[1].astype(np.dtype('float32'))

        ll = LinearLearner('SageMakerRole',
                           1,
                           'ml.c4.2xlarge',
                           base_job_name='test-linear-learner',
                           predictor_type='binary_classifier',
                           sagemaker_session=sagemaker_session)
        ll.binary_classifier_model_selection_criteria = 'accuracy'
        ll.target_recall = 0.5
        ll.target_precision = 0.5
        ll.positive_example_weight_mult = 0.1
        ll.epochs = 1
        ll.use_bias = True
        ll.num_models = 1
        ll.num_calibration_samples = 1
        ll.init_method = 'uniform'
        ll.init_scale = 0.5
        ll.init_sigma = 0.2
        ll.init_bias = 5
        ll.optimizer = 'adam'
        ll.loss = 'logistic'
        ll.wd = 0.5
        ll.l1 = 0.5
        ll.momentum = 0.5
        ll.learning_rate = 0.1
        ll.beta_1 = 0.1
        ll.beta_2 = 0.1
        ll.use_lr_scheduler = True
        ll.lr_scheduler_step = 2
        ll.lr_scheduler_factor = 0.5
        ll.lr_scheduler_minimum_lr = 0.1
        ll.normalize_data = False
        ll.normalize_label = False
        ll.unbias_data = True
        ll.unbias_label = False
        ll.num_point_for_scaler = 10000
        ll.margin = 1.0
        ll.quantile = 0.5
        ll.loss_insensitivity = 0.1
        ll.huber_delta = 0.1
        ll.early_stopping_tolerance = 0.0001
        ll.early_stopping_patience = 3
        ll.fit(ll.record_set(train_set[0][:200], train_set[1][:200]),
               wait=False)
        training_job_name = ll.latest_training_job.name

        print("Waiting to re-attach to the training job: %s" %
              training_job_name)
        time.sleep(20)

    with timeout_and_delete_endpoint_by_name(endpoint_name,
                                             sagemaker_session,
                                             minutes=35):
        estimator = LinearLearner.attach(training_job_name=training_job_name,
                                         sagemaker_session=sagemaker_session)
        model = LinearLearnerModel(estimator.model_data,
                                   role='SageMakerRole',
                                   sagemaker_session=sagemaker_session)
        predictor = model.deploy(1,
                                 'ml.c4.xlarge',
                                 endpoint_name=endpoint_name)

        result = predictor.predict(train_set[0][0:100])
        assert len(result) == 100
        for record in result:
            assert record.label["predicted_label"] is not None
            assert record.label["score"] is not None
def test_image(sagemaker_session):
    lr = LinearLearner(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)
    assert lr.train_image() == registry(REGION, 'linear-learner') + '/linear-learner:1'
Example #4
0
def test_iterable_hyper_parameters_type(sagemaker_session, iterable_hyper_parameters, value):
    with pytest.raises(TypeError):
        test_params = ALL_REQ_ARGS.copy()
        test_params.update({iterable_hyper_parameters: value})
        LinearLearner(sagemaker_session=sagemaker_session, **test_params)
Example #5
0
def test_all_hyperparameters(sagemaker_session):
    lr = LinearLearner(
        sagemaker_session=sagemaker_session,
        binary_classifier_model_selection_criteria="accuracy",
        target_recall=0.5,
        target_precision=0.6,
        positive_example_weight_mult=0.1,
        epochs=1,
        use_bias=True,
        num_models=5,
        num_calibration_samples=6,
        init_method="uniform",
        init_scale=0.1,
        init_sigma=0.001,
        init_bias=0,
        optimizer="sgd",
        loss="logistic",
        wd=0.4,
        l1=0.04,
        momentum=0.1,
        learning_rate=0.001,
        beta_1=0.2,
        beta_2=0.03,
        bias_lr_mult=5.5,
        bias_wd_mult=6.6,
        use_lr_scheduler=False,
        lr_scheduler_step=2,
        lr_scheduler_factor=0.03,
        lr_scheduler_minimum_lr=0.001,
        normalize_data=False,
        normalize_label=True,
        unbias_data=True,
        unbias_label=False,
        num_point_for_scaler=3,
        margin=1.0,
        quantile=0.5,
        loss_insensitivity=0.1,
        huber_delta=0.1,
        early_stopping_patience=3,
        early_stopping_tolerance=0.001,
        num_classes=1,
        accuracy_top_k=3,
        f_beta=1.0,
        balance_multiclass_weights=False,
        **ALL_REQ_ARGS
    )

    assert lr.hyperparameters() == dict(
        predictor_type="binary_classifier",
        binary_classifier_model_selection_criteria="accuracy",
        target_recall="0.5",
        target_precision="0.6",
        positive_example_weight_mult="0.1",
        epochs="1",
        use_bias="True",
        num_models="5",
        num_calibration_samples="6",
        init_method="uniform",
        init_scale="0.1",
        init_sigma="0.001",
        init_bias="0.0",
        optimizer="sgd",
        loss="logistic",
        wd="0.4",
        l1="0.04",
        momentum="0.1",
        learning_rate="0.001",
        beta_1="0.2",
        beta_2="0.03",
        bias_lr_mult="5.5",
        bias_wd_mult="6.6",
        use_lr_scheduler="False",
        lr_scheduler_step="2",
        lr_scheduler_factor="0.03",
        lr_scheduler_minimum_lr="0.001",
        normalize_data="False",
        normalize_label="True",
        unbias_data="True",
        unbias_label="False",
        num_point_for_scaler="3",
        margin="1.0",
        quantile="0.5",
        loss_insensitivity="0.1",
        huber_delta="0.1",
        early_stopping_patience="3",
        early_stopping_tolerance="0.001",
        num_classes="1",
        accuracy_top_k="3",
        f_beta="1.0",
        balance_multiclass_weights="False",
    )
Example #6
0
def test_all_hyperparameters(sagemaker_session):
    lr = LinearLearner(sagemaker_session=sagemaker_session,
                       binary_classifier_model_selection_criteria='accuracy',
                       target_recall=0.5,
                       target_precision=0.6,
                       positive_example_weight_mult=0.1,
                       epochs=1,
                       use_bias=True,
                       num_models=5,
                       num_calibration_samples=6,
                       init_method='uniform',
                       init_scale=0.1,
                       init_sigma=0.001,
                       init_bias=0,
                       optimizer='sgd',
                       loss='logistic',
                       wd=0.4,
                       l1=0.04,
                       momentum=0.1,
                       learning_rate=0.001,
                       beta_1=0.2,
                       beta_2=0.03,
                       bias_lr_mult=5.5,
                       bias_wd_mult=6.6,
                       use_lr_scheduler=False,
                       lr_scheduler_step=2,
                       lr_scheduler_factor=0.03,
                       lr_scheduler_minimum_lr=0.001,
                       normalize_data=False,
                       normalize_label=True,
                       unbias_data=True,
                       unbias_label=False,
                       num_point_for_scaler=3,
                       margin=1.0,
                       quantile=0.5,
                       loss_insensitivity=0.1,
                       huber_delta=0.1,
                       early_stopping_patience=3,
                       early_stopping_tolerance=0.001,
                       num_classes=1,
                       accuracy_top_k=3,
                       f_beta=1.0,
                       balance_multiclass_weights=False,
                       **ALL_REQ_ARGS)

    assert lr.hyperparameters() == dict(
        predictor_type='binary_classifier',
        binary_classifier_model_selection_criteria='accuracy',
        target_recall='0.5',
        target_precision='0.6',
        positive_example_weight_mult='0.1',
        epochs='1',
        use_bias='True',
        num_models='5',
        num_calibration_samples='6',
        init_method='uniform',
        init_scale='0.1',
        init_sigma='0.001',
        init_bias='0.0',
        optimizer='sgd',
        loss='logistic',
        wd='0.4',
        l1='0.04',
        momentum='0.1',
        learning_rate='0.001',
        beta_1='0.2',
        beta_2='0.03',
        bias_lr_mult='5.5',
        bias_wd_mult='6.6',
        use_lr_scheduler='False',
        lr_scheduler_step='2',
        lr_scheduler_factor='0.03',
        lr_scheduler_minimum_lr='0.001',
        normalize_data='False',
        normalize_label='True',
        unbias_data='True',
        unbias_label='False',
        num_point_for_scaler='3',
        margin='1.0',
        quantile='0.5',
        loss_insensitivity='0.1',
        huber_delta='0.1',
        early_stopping_patience='3',
        early_stopping_tolerance='0.001',
        num_classes='1',
        accuracy_top_k='3',
        f_beta='1.0',
        balance_multiclass_weights='False',
    )
def test_linear_learner(sagemaker_session, cpu_instance_type, training_set):
    job_name = unique_name_from_base("linear-learner")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        training_set[1][:100] = 1
        training_set[1][100:200] = 0
        training_set = training_set[0], training_set[1].astype(
            np.dtype("float32"))

        ll = LinearLearner(
            "SageMakerRole",
            1,
            cpu_instance_type,
            predictor_type="binary_classifier",
            sagemaker_session=sagemaker_session,
        )
        ll.binary_classifier_model_selection_criteria = "accuracy"
        ll.target_recall = 0.5
        ll.target_precision = 0.5
        ll.positive_example_weight_mult = 0.1
        ll.epochs = 1
        ll.use_bias = True
        ll.num_models = 1
        ll.num_calibration_samples = 1
        ll.init_method = "uniform"
        ll.init_scale = 0.5
        ll.init_sigma = 0.2
        ll.init_bias = 5
        ll.optimizer = "adam"
        ll.loss = "logistic"
        ll.wd = 0.5
        ll.l1 = 0.5
        ll.momentum = 0.5
        ll.learning_rate = 0.1
        ll.beta_1 = 0.1
        ll.beta_2 = 0.1
        ll.use_lr_scheduler = True
        ll.lr_scheduler_step = 2
        ll.lr_scheduler_factor = 0.5
        ll.lr_scheduler_minimum_lr = 0.1
        ll.normalize_data = False
        ll.normalize_label = False
        ll.unbias_data = True
        ll.unbias_label = False
        ll.num_point_for_scaler = 10000
        ll.margin = 1.0
        ll.quantile = 0.5
        ll.loss_insensitivity = 0.1
        ll.huber_delta = 0.1
        ll.early_stopping_tolerance = 0.0001
        ll.early_stopping_patience = 3
        ll.fit(ll.record_set(training_set[0][:200], training_set[1][:200]),
               job_name=job_name)

    with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session):
        predictor = ll.deploy(1, cpu_instance_type, endpoint_name=job_name)

        result = predictor.predict(training_set[0][0:100])
        assert len(result) == 100
        for record in result:
            assert record.label["predicted_label"] is not None
            assert record.label["score"] is not None
Example #8
0
def test_learning_rate_fail_value_high(sagemaker_session):
    with pytest.raises(ValueError):
        LinearLearner(learning_rate=1,
                      sagemaker_session=sagemaker_session,
                      **REQ_ARGS)
Example #9
0
def test_learning_rate_fail_type(sagemaker_session):
    with pytest.raises(ValueError):
        LinearLearner(learning_rate='blah',
                      sagemaker_session=sagemaker_session,
                      **REQ_ARGS)
Example #10
0
def test_momentum_fail_value_high(sagemaker_session):
    with pytest.raises(ValueError):
        LinearLearner(momentum=1,
                      sagemaker_session=sagemaker_session,
                      **REQ_ARGS)
Example #11
0
def test_momentum_fail_type(sagemaker_session):
    with pytest.raises(ValueError):
        LinearLearner(momentum='blah',
                      sagemaker_session=sagemaker_session,
                      **REQ_ARGS)
Example #12
0
def test_loss_fail(sagemaker_session):
    with pytest.raises(ValueError):
        LinearLearner(loss='other',
                      sagemaker_session=sagemaker_session,
                      **REQ_ARGS)
Example #13
0
def test_optimizer_fail(sagemaker_session):
    with pytest.raises(ValueError):
        LinearLearner(optimizer='other',
                      sagemaker_session=sagemaker_session,
                      **REQ_ARGS)
Example #14
0
def test_init_sigma_fail_type(sagemaker_session):
    with pytest.raises(ValueError):
        LinearLearner(init_sigma='blah',
                      sagemaker_session=sagemaker_session,
                      **REQ_ARGS)
Example #15
0
def test_lr_scheduler_minimum_lr_fail_type(sagemaker_session):
    with pytest.raises(ValueError):
        LinearLearner(lr_scheduler_minimum_lr='blah',
                      sagemaker_session=sagemaker_session,
                      **REQ_ARGS)
Example #16
0
def test_beta_1_fail_value_low(sagemaker_session):
    with pytest.raises(ValueError):
        LinearLearner(beta_1=0,
                      sagemaker_session=sagemaker_session,
                      **REQ_ARGS)
Example #17
0
def test_num_point_for_scaler_fail_type(sagemaker_session):
    with pytest.raises(ValueError):
        LinearLearner(num_point_for_scaler='other',
                      sagemaker_session=sagemaker_session,
                      **REQ_ARGS)
Example #18
0
def test_beta_2_fail_value_high(sagemaker_session):
    with pytest.raises(ValueError):
        LinearLearner(beta_2=1,
                      sagemaker_session=sagemaker_session,
                      **REQ_ARGS)
Example #19
0
def test_image(sagemaker_session):
    lr = LinearLearner(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)
    assert image_uris.retrieve("linear-learner",
                               REGION) == lr.training_image_uri()
Example #20
0
def test_beta_2_fail_type(sagemaker_session):
    with pytest.raises(ValueError):
        LinearLearner(beta_2='blah',
                      sagemaker_session=sagemaker_session,
                      **REQ_ARGS)
Example #21
0
def test_num_classes_can_be_string_for_multiclass_classifier(sagemaker_session):
    test_params = ALL_REQ_ARGS.copy()
    test_params["predictor_type"] = "multiclass_classifier"
    test_params["num_classes"] = "3"
    LinearLearner(sagemaker_session=sagemaker_session, **test_params)
Example #22
0
def test_bias_wd_mult_fail_value_low(sagemaker_session):
    with pytest.raises(ValueError):
        LinearLearner(bias_wd_mult=0,
                      sagemaker_session=sagemaker_session,
                      **REQ_ARGS)
Example #23
0
def test_optional_hyper_parameters_value(sagemaker_session, optional_hyper_parameters, value):
    with pytest.raises(ValueError):
        test_params = ALL_REQ_ARGS.copy()
        test_params.update({optional_hyper_parameters: value})
        LinearLearner(sagemaker_session=sagemaker_session, **test_params)
Example #24
0
def test_bias_wd_mult_fail_type(sagemaker_session):
    with pytest.raises(ValueError):
        LinearLearner(bias_wd_mult='blah',
                      sagemaker_session=sagemaker_session,
                      **REQ_ARGS)
Example #25
0
def test_linear_learner():
    with timeout(minutes=15):
        sagemaker_session = sagemaker.Session(boto_session=boto3.Session(
            region_name=REGION))
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {
            'encoding': 'latin1'
        }

        # Load the data into memory as numpy arrays
        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        train_set[1][:100] = 1
        train_set[1][100:200] = 0
        train_set = train_set[0], train_set[1].astype(np.dtype('float32'))

        ll = LinearLearner('SageMakerRole',
                           1,
                           'ml.c4.2xlarge',
                           base_job_name='test-linear-learner',
                           sagemaker_session=sagemaker_session)
        ll.binary_classifier_model_selection_criteria = 'accuracy'
        ll.target_recall = 0.5
        ll.target_precision = 0.5
        ll.positive_example_weight_mult = 0.1
        ll.epochs = 1
        ll.predictor_type = 'binary_classifier'
        ll.use_bias = True
        ll.num_models = 1
        ll.num_calibration_samples = 1
        ll.init_method = 'uniform'
        ll.init_scale = 0.5
        ll.init_sigma = 0.2
        ll.init_bias = 5
        ll.optimizer = 'adam'
        ll.loss = 'logistic'
        ll.wd = 0.5
        ll.l1 = 0.5
        ll.momentum = 0.5
        ll.learning_rate = 0.1
        ll.beta_1 = 0.1
        ll.beta_2 = 0.1
        ll.use_lr_scheduler = True
        ll.lr_scheduler_step = 2
        ll.lr_scheduler_factor = 0.5
        ll.lr_scheduler_minimum_lr = 0.1
        ll.normalize_data = False
        ll.normalize_label = False
        ll.unbias_data = True
        ll.unbias_label = False
        ll.num_point_for_scaler = 10000
        ll.fit(ll.record_set(train_set[0][:200], train_set[1][:200]))

    endpoint_name = name_from_base('linear-learner')
    with timeout_and_delete_endpoint_by_name(endpoint_name,
                                             sagemaker_session,
                                             minutes=20):

        predictor = ll.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name)

        result = predictor.predict(train_set[0][0:100])
        assert len(result) == 100
        for record in result:
            assert record.label["predicted_label"] is not None
            assert record.label["score"] is not None
Example #26
0
def test_lr_scheduler_step_fail_type(sagemaker_session):
    with pytest.raises(ValueError):
        LinearLearner(lr_scheduler_step='other',
                      sagemaker_session=sagemaker_session,
                      **REQ_ARGS)
def test_linear_learner(sagemaker_session):
    job_name = unique_name_from_base("linear-learner")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
        pickle_args = {} if sys.version_info.major == 2 else {
            "encoding": "latin1"
        }

        # Load the data into memory as numpy arrays
        with gzip.open(data_path, "rb") as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        train_set[1][:100] = 1
        train_set[1][100:200] = 0
        train_set = train_set[0], train_set[1].astype(np.dtype("float32"))

        ll = LinearLearner(
            "SageMakerRole",
            1,
            "ml.c4.2xlarge",
            predictor_type="binary_classifier",
            sagemaker_session=sagemaker_session,
        )
        ll.binary_classifier_model_selection_criteria = "accuracy"
        ll.target_recall = 0.5
        ll.target_precision = 0.5
        ll.positive_example_weight_mult = 0.1
        ll.epochs = 1
        ll.use_bias = True
        ll.num_models = 1
        ll.num_calibration_samples = 1
        ll.init_method = "uniform"
        ll.init_scale = 0.5
        ll.init_sigma = 0.2
        ll.init_bias = 5
        ll.optimizer = "adam"
        ll.loss = "logistic"
        ll.wd = 0.5
        ll.l1 = 0.5
        ll.momentum = 0.5
        ll.learning_rate = 0.1
        ll.beta_1 = 0.1
        ll.beta_2 = 0.1
        ll.use_lr_scheduler = True
        ll.lr_scheduler_step = 2
        ll.lr_scheduler_factor = 0.5
        ll.lr_scheduler_minimum_lr = 0.1
        ll.normalize_data = False
        ll.normalize_label = False
        ll.unbias_data = True
        ll.unbias_label = False
        ll.num_point_for_scaler = 10000
        ll.margin = 1.0
        ll.quantile = 0.5
        ll.loss_insensitivity = 0.1
        ll.huber_delta = 0.1
        ll.early_stopping_tolerance = 0.0001
        ll.early_stopping_patience = 3
        ll.fit(ll.record_set(train_set[0][:200], train_set[1][:200]),
               job_name=job_name)

    with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session):
        predictor = ll.deploy(1, "ml.c4.xlarge", endpoint_name=job_name)

        result = predictor.predict(train_set[0][0:100])
        assert len(result) == 100
        for record in result:
            assert record.label["predicted_label"] is not None
            assert record.label["score"] is not None
Example #28
0
def test_lr_scheduler_factor_fail_value_high(sagemaker_session):
    with pytest.raises(ValueError):
        LinearLearner(lr_scheduler_factor=1,
                      sagemaker_session=sagemaker_session,
                      **REQ_ARGS)
def test_required_hyper_parameters_value(sagemaker_session, required_hyper_parameters, value):
    with pytest.raises(ValueError):
        test_params = ALL_REQ_ARGS.copy()
        test_params[required_hyper_parameters] = value
        LinearLearner(sagemaker_session=sagemaker_session, **test_params)
Example #30
0
def test_init_scale_fail_value_low(sagemaker_session):
    with pytest.raises(ValueError):
        LinearLearner(init_scale=1.01,
                      sagemaker_session=sagemaker_session,
                      **REQ_ARGS)