예제 #1
0
def test_all_hyperparameters(sagemaker_session):
    lr = LinearLearner(sagemaker_session=sagemaker_session,
                       binary_classifier_model_selection_criteria='accuracy',
                       target_recall=0.5, target_precision=0.6,
                       positive_example_weight_mult=0.1, epochs=1, use_bias=True, num_models=5,
                       num_calibration_samples=6, init_method='uniform', init_scale=0.1, init_sigma=0.001,
                       init_bias=0, optimizer='sgd', loss='logistic', wd=0.4, l1=0.04, momentum=0.1,
                       learning_rate=0.001, beta_1=0.2, beta_2=0.03, bias_lr_mult=5.5, bias_wd_mult=6.6,
                       use_lr_scheduler=False, lr_scheduler_step=2, lr_scheduler_factor=0.03,
                       lr_scheduler_minimum_lr=0.001, normalize_data=False, normalize_label=True,
                       unbias_data=True, unbias_label=False, num_point_for_scaler=3, margin=1.0,
                       quantile=0.5, loss_insensitivity=0.1, huber_delta=0.1, early_stopping_patience=3,
                       early_stopping_tolerance=0.001, num_classes=1, accuracy_top_k=3, f_beta=1.0,
                       balance_multiclass_weights=False, **ALL_REQ_ARGS)

    assert lr.hyperparameters() == dict(
        predictor_type='binary_classifier', binary_classifier_model_selection_criteria='accuracy',
        target_recall='0.5', target_precision='0.6', positive_example_weight_mult='0.1', epochs='1',
        use_bias='True', num_models='5', num_calibration_samples='6', init_method='uniform',
        init_scale='0.1', init_sigma='0.001', init_bias='0.0', optimizer='sgd', loss='logistic',
        wd='0.4', l1='0.04', momentum='0.1', learning_rate='0.001', beta_1='0.2', beta_2='0.03',
        bias_lr_mult='5.5', bias_wd_mult='6.6', use_lr_scheduler='False', lr_scheduler_step='2',
        lr_scheduler_factor='0.03', lr_scheduler_minimum_lr='0.001', normalize_data='False',
        normalize_label='True', unbias_data='True', unbias_label='False', num_point_for_scaler='3', margin='1.0',
        quantile='0.5', loss_insensitivity='0.1', huber_delta='0.1', early_stopping_patience='3',
        early_stopping_tolerance='0.001', num_classes='1', accuracy_top_k='3', f_beta='1.0',
        balance_multiclass_weights='False',
    )
예제 #2
0
def test_model_image(sagemaker_session):
    lr = LinearLearner(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)
    data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train')
    lr.fit(data)

    model = lr.create_model()
    assert model.image == registry(REGION, 'linear-learner') + '/linear-learner:1'
예제 #3
0
def test_predictor_type(sagemaker_session):
    lr = LinearLearner(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)
    data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train')
    lr.fit(data)
    model = lr.create_model()
    predictor = model.deploy(1, TRAIN_INSTANCE_TYPE)

    assert isinstance(predictor, LinearLearnerPredictor)
예제 #4
0
def test_prepare_for_training_calculate_batch_size_1(sagemaker_session):
    lr = LinearLearner(base_job_name='lr', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)

    data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train')

    lr._prepare_for_training(data)

    assert lr.mini_batch_size == 1
예제 #5
0
def test_prepare_for_training_multiple_channel(sagemaker_session):
    lr = LinearLearner(base_job_name='lr', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)

    data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX),
                     num_records=10000,
                     feature_dim=FEATURE_DIM,
                     channel='train')

    lr._prepare_for_training([data, data])

    assert lr.mini_batch_size == DEFAULT_MINI_BATCH_SIZE
예제 #6
0
def test_prepare_for_training_multiple_channel_no_train(sagemaker_session):
    lr = LinearLearner(base_job_name='lr', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)

    data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX),
                     num_records=10000,
                     feature_dim=FEATURE_DIM,
                     channel='mock')

    with pytest.raises(ValueError) as ex:
        lr._prepare_for_training([data, data])

    assert 'Must provide train channel.' in str(ex)
예제 #7
0
def test_model_image(sagemaker_session):
    lr = LinearLearner(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)
    data = RecordSet(
        "s3://{}/{}".format(BUCKET_NAME, PREFIX),
        num_records=1,
        feature_dim=FEATURE_DIM,
        channel="train",
    )
    lr.fit(data)

    model = lr.create_model()
    assert image_uris.retrieve("linear-learner", REGION) == model.image_uri
def test_prepare_for_training_calculate_batch_size_2(sagemaker_session):
    lr = LinearLearner(base_job_name="lr", sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)

    data = RecordSet(
        "s3://{}/{}".format(BUCKET_NAME, PREFIX),
        num_records=10000,
        feature_dim=FEATURE_DIM,
        channel="train",
    )

    lr._prepare_for_training(data)

    assert lr.mini_batch_size == DEFAULT_MINI_BATCH_SIZE
예제 #9
0
def test_call_fit_pass_batch_size(base_fit, sagemaker_session):
    lr = LinearLearner(base_job_name='lr', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)

    data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX),
                     num_records=10000,
                     feature_dim=FEATURE_DIM,
                     channel='train')

    lr.fit(data, 10)

    base_fit.assert_called_once()
    assert len(base_fit.call_args[0]) == 2
    assert base_fit.call_args[0][0] == data
    assert base_fit.call_args[0][1] == 10
예제 #10
0
def test_init_required_positional(sagemaker_session):
    lr = LinearLearner(ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, PREDICTOR_TYPE,
                       sagemaker_session=sagemaker_session)
    assert lr.role == ROLE
    assert lr.train_instance_count == TRAIN_INSTANCE_COUNT
    assert lr.train_instance_type == TRAIN_INSTANCE_TYPE
    assert lr.predictor_type == PREDICTOR_TYPE
예제 #11
0
def test_num_classes_is_required_for_multiclass_classifier(sagemaker_session):
    with pytest.raises(ValueError) as excinfo:
        test_params = ALL_REQ_ARGS.copy()
        test_params["predictor_type"] = 'multiclass_classifier'
        LinearLearner(sagemaker_session=sagemaker_session, **test_params)
    assert "For predictor_type 'multiclass_classifier', 'num_classes' should be set to a value greater than 2." in str(
        excinfo.value)
예제 #12
0
def test_init_required_named(sagemaker_session):
    lr = LinearLearner(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)

    assert lr.role == ALL_REQ_ARGS["role"]
    assert lr.instance_count == ALL_REQ_ARGS["instance_count"]
    assert lr.instance_type == ALL_REQ_ARGS["instance_type"]
    assert lr.predictor_type == ALL_REQ_ARGS["predictor_type"]
예제 #13
0
def test_call_fit_calculate_batch_size_2(base_fit, sagemaker_session):
    lr = LinearLearner(base_job_name="lr",
                       sagemaker_session=sagemaker_session,
                       **REQ_ARGS)

    data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX),
                     num_records=10000,
                     feature_dim=FEATURE_DIM,
                     channel='train')

    lr.fit(data)

    base_fit.assert_called_once()
    assert len(base_fit.call_args[0]) == 2
    assert base_fit.call_args[0][0] == data
    assert base_fit.call_args[0][1] == DEFAULT_MINI_BATCH_SIZE
예제 #14
0
def test_init_required_named(sagemaker_session):
    lr = LinearLearner(sagemaker_session=sagemaker_session, **REQ_ARGS)

    assert lr.role == REQ_ARGS['role']
    assert lr.train_instance_count == REQ_ARGS['train_instance_count']
    assert lr.train_instance_type == REQ_ARGS['train_instance_type']
    assert lr.predictor_type == DEFAULT_PREDICTOR_TYPE
def test_init_required_named(sagemaker_session):
    lr = LinearLearner(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)

    assert lr.role == ALL_REQ_ARGS['role']
    assert lr.train_instance_count == ALL_REQ_ARGS['train_instance_count']
    assert lr.train_instance_type == ALL_REQ_ARGS['train_instance_type']
    assert lr.predictor_type == ALL_REQ_ARGS['predictor_type']
def test_linear_learner_multiclass(sagemaker_session, cpu_instance_type,
                                   training_set):
    job_name = unique_name_from_base("linear-learner")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        training_set = training_set[0], training_set[1].astype(
            np.dtype("float32"))

        ll = LinearLearner(
            "SageMakerRole",
            1,
            cpu_instance_type,
            predictor_type="multiclass_classifier",
            num_classes=10,
            sagemaker_session=sagemaker_session,
        )

        ll.epochs = 1
        ll.fit(ll.record_set(training_set[0][:200], training_set[1][:200]),
               job_name=job_name)

    with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session):
        predictor = ll.deploy(1, cpu_instance_type, endpoint_name=job_name)

        result = predictor.predict(training_set[0][0:100])
        assert len(result) == 100
        for record in result:
            assert record.label["predicted_label"] is not None
            assert record.label["score"] is not None
def test_linear_learner_multiclass(sagemaker_session):
    job_name = unique_name_from_base('linear-learner')

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {
            'encoding': 'latin1'
        }

        # Load the data into memory as numpy arrays
        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        train_set = train_set[0], train_set[1].astype(np.dtype('float32'))

        ll = LinearLearner('SageMakerRole',
                           1,
                           'ml.c4.2xlarge',
                           predictor_type='multiclass_classifier',
                           num_classes=10,
                           sagemaker_session=sagemaker_session)

        ll.epochs = 1
        ll.fit(ll.record_set(train_set[0][:200], train_set[1][:200]),
               job_name=job_name)

    with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session):
        predictor = ll.deploy(1, 'ml.c4.xlarge', endpoint_name=job_name)

        result = predictor.predict(train_set[0][0:100])
        assert len(result) == 100
        for record in result:
            assert record.label["predicted_label"] is not None
            assert record.label["score"] is not None
def test_linear_learner_multiclass(sagemaker_session):
    with timeout(minutes=15):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'}

        # Load the data into memory as numpy arrays
        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        train_set = train_set[0], train_set[1].astype(np.dtype('float32'))

        ll = LinearLearner('SageMakerRole', 1, 'ml.c4.2xlarge', base_job_name='test-linear-learner',
                           predictor_type='multiclass_classifier', num_classes=10, sagemaker_session=sagemaker_session)

        ll.epochs = 1
        ll.fit(ll.record_set(train_set[0][:200], train_set[1][:200]))

    endpoint_name = name_from_base('linear-learner')
    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):

        predictor = ll.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name)

        result = predictor.predict(train_set[0][0:100])
        assert len(result) == 100
        for record in result:
            assert record.label["predicted_label"] is not None
            assert record.label["score"] is not None
예제 #19
0
def test_predictor_custom_serialization(sagemaker_session):
    lr = LinearLearner(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)
    data = RecordSet(
        "s3://{}/{}".format(BUCKET_NAME, PREFIX),
        num_records=1,
        feature_dim=FEATURE_DIM,
        channel="train",
    )
    lr.fit(data)
    model = lr.create_model()
    custom_serializer = Mock()
    custom_deserializer = Mock()
    predictor = model.deploy(
        1,
        INSTANCE_TYPE,
        serializer=custom_serializer,
        deserializer=custom_deserializer,
    )

    assert isinstance(predictor, LinearLearnerPredictor)
    assert predictor.serializer is custom_serializer
    assert predictor.deserializer is custom_deserializer
def test_linear_learner_multiclass(sagemaker_session, cpu_instance_type):
    job_name = unique_name_from_base("linear-learner")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
        pickle_args = {} if sys.version_info.major == 2 else {
            "encoding": "latin1"
        }

        # Load the data into memory as numpy arrays
        with gzip.open(data_path, "rb") as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        train_set = train_set[0], train_set[1].astype(np.dtype("float32"))

        ll = LinearLearner(
            "SageMakerRole",
            1,
            cpu_instance_type,
            predictor_type="multiclass_classifier",
            num_classes=10,
            sagemaker_session=sagemaker_session,
        )

        ll.epochs = 1
        ll.fit(ll.record_set(train_set[0][:200], train_set[1][:200]),
               job_name=job_name)

    with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session):
        predictor = ll.deploy(1, cpu_instance_type, endpoint_name=job_name)

        result = predictor.predict(train_set[0][0:100])
        assert len(result) == 100
        for record in result:
            assert record.label["predicted_label"] is not None
            assert record.label["score"] is not None
def test_image(sagemaker_session):
    lr = LinearLearner(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)
    assert lr.train_image() == registry(REGION, 'linear-learner') + '/linear-learner:1'
예제 #22
0
def test_optional_hyper_parameters_value(sagemaker_session,
                                         optional_hyper_parameters, value):
    with pytest.raises(ValueError):
        test_params = ALL_REQ_ARGS.copy()
        test_params.update({optional_hyper_parameters: value})
        LinearLearner(sagemaker_session=sagemaker_session, **test_params)
예제 #23
0
def test_iterable_hyper_parameters_type(sagemaker_session,
                                        iterable_hyper_parameters, value):
    with pytest.raises(TypeError):
        test_params = ALL_REQ_ARGS.copy()
        test_params.update({iterable_hyper_parameters: value})
        LinearLearner(sagemaker_session=sagemaker_session, **test_params)
예제 #24
0
def test_num_classes_can_be_string_for_multiclass_classifier(
        sagemaker_session):
    test_params = ALL_REQ_ARGS.copy()
    test_params["predictor_type"] = "multiclass_classifier"
    test_params["num_classes"] = "3"
    LinearLearner(sagemaker_session=sagemaker_session, **test_params)
예제 #25
0
def test_required_hyper_parameters_value(sagemaker_session,
                                         required_hyper_parameters, value):
    with pytest.raises(ValueError):
        test_params = ALL_REQ_ARGS.copy()
        test_params[required_hyper_parameters] = value
        LinearLearner(sagemaker_session=sagemaker_session, **test_params)
예제 #26
0
def test_image(sagemaker_session):
    lr = LinearLearner(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)
    assert image_uris.retrieve("linear-learner",
                               REGION) == lr.training_image_uri()
def test_image(sagemaker_session):
    lr = LinearLearner(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)
    assert lr.train_image() == registry(REGION,
                                        'linear-learner') + '/linear-learner:1'
def test_linear_learner(sagemaker_session):
    with timeout(minutes=15):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {
            'encoding': 'latin1'
        }

        # Load the data into memory as numpy arrays
        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        train_set[1][:100] = 1
        train_set[1][100:200] = 0
        train_set = train_set[0], train_set[1].astype(np.dtype('float32'))

        ll = LinearLearner('SageMakerRole',
                           1,
                           'ml.c4.2xlarge',
                           base_job_name='test-linear-learner',
                           predictor_type='binary_classifier',
                           sagemaker_session=sagemaker_session)
        ll.binary_classifier_model_selection_criteria = 'accuracy'
        ll.target_recall = 0.5
        ll.target_precision = 0.5
        ll.positive_example_weight_mult = 0.1
        ll.epochs = 1
        ll.use_bias = True
        ll.num_models = 1
        ll.num_calibration_samples = 1
        ll.init_method = 'uniform'
        ll.init_scale = 0.5
        ll.init_sigma = 0.2
        ll.init_bias = 5
        ll.optimizer = 'adam'
        ll.loss = 'logistic'
        ll.wd = 0.5
        ll.l1 = 0.5
        ll.momentum = 0.5
        ll.learning_rate = 0.1
        ll.beta_1 = 0.1
        ll.beta_2 = 0.1
        ll.use_lr_scheduler = True
        ll.lr_scheduler_step = 2
        ll.lr_scheduler_factor = 0.5
        ll.lr_scheduler_minimum_lr = 0.1
        ll.normalize_data = False
        ll.normalize_label = False
        ll.unbias_data = True
        ll.unbias_label = False
        ll.num_point_for_scaler = 10000
        ll.margin = 1.0
        ll.quantile = 0.5
        ll.loss_insensitivity = 0.1
        ll.huber_delta = 0.1
        ll.early_stopping_tolerance = 0.0001
        ll.early_stopping_patience = 3
        ll.fit(ll.record_set(train_set[0][:200], train_set[1][:200]))

    endpoint_name = name_from_base('linear-learner')
    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):

        predictor = ll.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name)

        result = predictor.predict(train_set[0][0:100])
        assert len(result) == 100
        for record in result:
            assert record.label["predicted_label"] is not None
            assert record.label["score"] is not None
def test_linear_learner(sagemaker_session, cpu_instance_type):
    job_name = unique_name_from_base("linear-learner")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
        pickle_args = {} if sys.version_info.major == 2 else {
            "encoding": "latin1"
        }

        # Load the data into memory as numpy arrays
        with gzip.open(data_path, "rb") as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        train_set[1][:100] = 1
        train_set[1][100:200] = 0
        train_set = train_set[0], train_set[1].astype(np.dtype("float32"))

        ll = LinearLearner(
            "SageMakerRole",
            1,
            cpu_instance_type,
            predictor_type="binary_classifier",
            sagemaker_session=sagemaker_session,
        )
        ll.binary_classifier_model_selection_criteria = "accuracy"
        ll.target_recall = 0.5
        ll.target_precision = 0.5
        ll.positive_example_weight_mult = 0.1
        ll.epochs = 1
        ll.use_bias = True
        ll.num_models = 1
        ll.num_calibration_samples = 1
        ll.init_method = "uniform"
        ll.init_scale = 0.5
        ll.init_sigma = 0.2
        ll.init_bias = 5
        ll.optimizer = "adam"
        ll.loss = "logistic"
        ll.wd = 0.5
        ll.l1 = 0.5
        ll.momentum = 0.5
        ll.learning_rate = 0.1
        ll.beta_1 = 0.1
        ll.beta_2 = 0.1
        ll.use_lr_scheduler = True
        ll.lr_scheduler_step = 2
        ll.lr_scheduler_factor = 0.5
        ll.lr_scheduler_minimum_lr = 0.1
        ll.normalize_data = False
        ll.normalize_label = False
        ll.unbias_data = True
        ll.unbias_label = False
        ll.num_point_for_scaler = 10000
        ll.margin = 1.0
        ll.quantile = 0.5
        ll.loss_insensitivity = 0.1
        ll.huber_delta = 0.1
        ll.early_stopping_tolerance = 0.0001
        ll.early_stopping_patience = 3
        ll.fit(ll.record_set(train_set[0][:200], train_set[1][:200]),
               job_name=job_name)

    with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session):
        predictor = ll.deploy(1, cpu_instance_type, endpoint_name=job_name)

        result = predictor.predict(train_set[0][0:100])
        assert len(result) == 100
        for record in result:
            assert record.label["predicted_label"] is not None
            assert record.label["score"] is not None
def test_async_linear_learner(sagemaker_session, cpu_instance_type,
                              training_set):
    job_name = unique_name_from_base("linear-learner")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        training_set[1][:100] = 1
        training_set[1][100:200] = 0
        training_set = training_set[0], training_set[1].astype(
            np.dtype("float32"))

        ll = LinearLearner(
            "SageMakerRole",
            1,
            cpu_instance_type,
            predictor_type="binary_classifier",
            sagemaker_session=sagemaker_session,
        )
        ll.binary_classifier_model_selection_criteria = "accuracy"
        ll.target_recall = 0.5
        ll.target_precision = 0.5
        ll.positive_example_weight_mult = 0.1
        ll.epochs = 1
        ll.use_bias = True
        ll.num_models = 1
        ll.num_calibration_samples = 1
        ll.init_method = "uniform"
        ll.init_scale = 0.5
        ll.init_sigma = 0.2
        ll.init_bias = 5
        ll.optimizer = "adam"
        ll.loss = "logistic"
        ll.wd = 0.5
        ll.l1 = 0.5
        ll.momentum = 0.5
        ll.learning_rate = 0.1
        ll.beta_1 = 0.1
        ll.beta_2 = 0.1
        ll.use_lr_scheduler = True
        ll.lr_scheduler_step = 2
        ll.lr_scheduler_factor = 0.5
        ll.lr_scheduler_minimum_lr = 0.1
        ll.normalize_data = False
        ll.normalize_label = False
        ll.unbias_data = True
        ll.unbias_label = False
        ll.num_point_for_scaler = 10000
        ll.margin = 1.0
        ll.quantile = 0.5
        ll.loss_insensitivity = 0.1
        ll.huber_delta = 0.1
        ll.early_stopping_tolerance = 0.0001
        ll.early_stopping_patience = 3
        ll.fit(
            ll.record_set(training_set[0][:200], training_set[1][:200]),
            wait=False,
            job_name=job_name,
        )

        print("Waiting to re-attach to the training job: %s" % job_name)
        time.sleep(20)

    with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session):
        estimator = LinearLearner.attach(training_job_name=job_name,
                                         sagemaker_session=sagemaker_session)
        model = LinearLearnerModel(estimator.model_data,
                                   role="SageMakerRole",
                                   sagemaker_session=sagemaker_session)
        predictor = model.deploy(1, cpu_instance_type, endpoint_name=job_name)

        result = predictor.predict(training_set[0][0:100])
        assert len(result) == 100
        for record in result:
            assert record.label["predicted_label"] is not None
            assert record.label["score"] is not None
예제 #31
0
def test_all_hyperparameters(sagemaker_session):
    lr = LinearLearner(
        sagemaker_session=sagemaker_session,
        binary_classifier_model_selection_criteria="accuracy",
        target_recall=0.5,
        target_precision=0.6,
        positive_example_weight_mult=0.1,
        epochs=1,
        use_bias=True,
        num_models=5,
        num_calibration_samples=6,
        init_method="uniform",
        init_scale=0.1,
        init_sigma=0.001,
        init_bias=0,
        optimizer="sgd",
        loss="logistic",
        wd=0.4,
        l1=0.04,
        momentum=0.1,
        learning_rate=0.001,
        beta_1=0.2,
        beta_2=0.03,
        bias_lr_mult=5.5,
        bias_wd_mult=6.6,
        use_lr_scheduler=False,
        lr_scheduler_step=2,
        lr_scheduler_factor=0.03,
        lr_scheduler_minimum_lr=0.001,
        normalize_data=False,
        normalize_label=True,
        unbias_data=True,
        unbias_label=False,
        num_point_for_scaler=3,
        margin=1.0,
        quantile=0.5,
        loss_insensitivity=0.1,
        huber_delta=0.1,
        early_stopping_patience=3,
        early_stopping_tolerance=0.001,
        num_classes=1,
        accuracy_top_k=3,
        f_beta=1.0,
        balance_multiclass_weights=False,
        **ALL_REQ_ARGS,
    )

    assert lr.hyperparameters() == dict(
        predictor_type="binary_classifier",
        binary_classifier_model_selection_criteria="accuracy",
        target_recall="0.5",
        target_precision="0.6",
        positive_example_weight_mult="0.1",
        epochs="1",
        use_bias="True",
        num_models="5",
        num_calibration_samples="6",
        init_method="uniform",
        init_scale="0.1",
        init_sigma="0.001",
        init_bias="0.0",
        optimizer="sgd",
        loss="logistic",
        wd="0.4",
        l1="0.04",
        momentum="0.1",
        learning_rate="0.001",
        beta_1="0.2",
        beta_2="0.03",
        bias_lr_mult="5.5",
        bias_wd_mult="6.6",
        use_lr_scheduler="False",
        lr_scheduler_step="2",
        lr_scheduler_factor="0.03",
        lr_scheduler_minimum_lr="0.001",
        normalize_data="False",
        normalize_label="True",
        unbias_data="True",
        unbias_label="False",
        num_point_for_scaler="3",
        margin="1.0",
        quantile="0.5",
        loss_insensitivity="0.1",
        huber_delta="0.1",
        early_stopping_patience="3",
        early_stopping_tolerance="0.001",
        num_classes="1",
        accuracy_top_k="3",
        f_beta="1.0",
        balance_multiclass_weights="False",
    )
def test_linear_learner(sagemaker_session):
    with timeout(minutes=15):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'}

        # Load the data into memory as numpy arrays
        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        train_set[1][:100] = 1
        train_set[1][100:200] = 0
        train_set = train_set[0], train_set[1].astype(np.dtype('float32'))

        ll = LinearLearner('SageMakerRole', 1, 'ml.c4.2xlarge', base_job_name='test-linear-learner',
                           predictor_type='binary_classifier', sagemaker_session=sagemaker_session)
        ll.binary_classifier_model_selection_criteria = 'accuracy'
        ll.target_recall = 0.5
        ll.target_precision = 0.5
        ll.positive_example_weight_mult = 0.1
        ll.epochs = 1
        ll.use_bias = True
        ll.num_models = 1
        ll.num_calibration_samples = 1
        ll.init_method = 'uniform'
        ll.init_scale = 0.5
        ll.init_sigma = 0.2
        ll.init_bias = 5
        ll.optimizer = 'adam'
        ll.loss = 'logistic'
        ll.wd = 0.5
        ll.l1 = 0.5
        ll.momentum = 0.5
        ll.learning_rate = 0.1
        ll.beta_1 = 0.1
        ll.beta_2 = 0.1
        ll.use_lr_scheduler = True
        ll.lr_scheduler_step = 2
        ll.lr_scheduler_factor = 0.5
        ll.lr_scheduler_minimum_lr = 0.1
        ll.normalize_data = False
        ll.normalize_label = False
        ll.unbias_data = True
        ll.unbias_label = False
        ll.num_point_for_scaler = 10000
        ll.margin = 1.0
        ll.quantile = 0.5
        ll.loss_insensitivity = 0.1
        ll.huber_delta = 0.1
        ll.early_stopping_tolerance = 0.0001
        ll.early_stopping_patience = 3
        ll.fit(ll.record_set(train_set[0][:200], train_set[1][:200]))

    endpoint_name = name_from_base('linear-learner')
    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):

        predictor = ll.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name)

        result = predictor.predict(train_set[0][0:100])
        assert len(result) == 100
        for record in result:
            assert record.label["predicted_label"] is not None
            assert record.label["score"] is not None
예제 #33
0
def model_name(sagemaker_session, cpu_instance_type, training_set):
    job_name = utils.unique_name_from_base("clarify-xgb")

    with timeout.timeout(minutes=integ.TRAINING_DEFAULT_TIMEOUT_MINUTES):
        ll = LinearLearner(
            "SageMakerRole",
            1,
            cpu_instance_type,
            predictor_type="binary_classifier",
            sagemaker_session=sagemaker_session,
            disable_profiler=True,
        )
        ll.binary_classifier_model_selection_criteria = "accuracy"
        ll.early_stopping_tolerance = 0.0001
        ll.early_stopping_patience = 3
        ll.num_models = 1
        ll.epochs = 1
        ll.num_calibration_samples = 1

        features, label = training_set
        ll.fit(
            ll.record_set(features.astype(np.float32), label.reshape(-1).astype(np.float32)),
            job_name=job_name,
        )

    with timeout.timeout_and_delete_endpoint_by_name(job_name, sagemaker_session):
        ll.deploy(1, cpu_instance_type, endpoint_name=job_name, model_name=job_name, wait=True)
        yield job_name