def test_all_hyperparameters(sagemaker_session): lr = LinearLearner(sagemaker_session=sagemaker_session, binary_classifier_model_selection_criteria='accuracy', target_recall=0.5, target_precision=0.6, positive_example_weight_mult=0.1, epochs=1, use_bias=True, num_models=5, num_calibration_samples=6, init_method='uniform', init_scale=0.1, init_sigma=0.001, init_bias=0, optimizer='sgd', loss='logistic', wd=0.4, l1=0.04, momentum=0.1, learning_rate=0.001, beta_1=0.2, beta_2=0.03, bias_lr_mult=5.5, bias_wd_mult=6.6, use_lr_scheduler=False, lr_scheduler_step=2, lr_scheduler_factor=0.03, lr_scheduler_minimum_lr=0.001, normalize_data=False, normalize_label=True, unbias_data=True, unbias_label=False, num_point_for_scaler=3, margin=1.0, quantile=0.5, loss_insensitivity=0.1, huber_delta=0.1, early_stopping_patience=3, early_stopping_tolerance=0.001, num_classes=1, accuracy_top_k=3, f_beta=1.0, balance_multiclass_weights=False, **ALL_REQ_ARGS) assert lr.hyperparameters() == dict( predictor_type='binary_classifier', binary_classifier_model_selection_criteria='accuracy', target_recall='0.5', target_precision='0.6', positive_example_weight_mult='0.1', epochs='1', use_bias='True', num_models='5', num_calibration_samples='6', init_method='uniform', init_scale='0.1', init_sigma='0.001', init_bias='0.0', optimizer='sgd', loss='logistic', wd='0.4', l1='0.04', momentum='0.1', learning_rate='0.001', beta_1='0.2', beta_2='0.03', bias_lr_mult='5.5', bias_wd_mult='6.6', use_lr_scheduler='False', lr_scheduler_step='2', lr_scheduler_factor='0.03', lr_scheduler_minimum_lr='0.001', normalize_data='False', normalize_label='True', unbias_data='True', unbias_label='False', num_point_for_scaler='3', margin='1.0', quantile='0.5', loss_insensitivity='0.1', huber_delta='0.1', early_stopping_patience='3', early_stopping_tolerance='0.001', num_classes='1', accuracy_top_k='3', f_beta='1.0', balance_multiclass_weights='False', )
def test_model_image(sagemaker_session): lr = LinearLearner(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') lr.fit(data) model = lr.create_model() assert model.image == registry(REGION, 'linear-learner') + '/linear-learner:1'
def test_predictor_type(sagemaker_session): lr = LinearLearner(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') lr.fit(data) model = lr.create_model() predictor = model.deploy(1, TRAIN_INSTANCE_TYPE) assert isinstance(predictor, LinearLearnerPredictor)
def test_prepare_for_training_calculate_batch_size_1(sagemaker_session): lr = LinearLearner(base_job_name='lr', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel='train') lr._prepare_for_training(data) assert lr.mini_batch_size == 1
def test_prepare_for_training_multiple_channel(sagemaker_session): lr = LinearLearner(base_job_name='lr', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=10000, feature_dim=FEATURE_DIM, channel='train') lr._prepare_for_training([data, data]) assert lr.mini_batch_size == DEFAULT_MINI_BATCH_SIZE
def test_prepare_for_training_multiple_channel_no_train(sagemaker_session): lr = LinearLearner(base_job_name='lr', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=10000, feature_dim=FEATURE_DIM, channel='mock') with pytest.raises(ValueError) as ex: lr._prepare_for_training([data, data]) assert 'Must provide train channel.' in str(ex)
def test_model_image(sagemaker_session): lr = LinearLearner(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet( "s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel="train", ) lr.fit(data) model = lr.create_model() assert image_uris.retrieve("linear-learner", REGION) == model.image_uri
def test_prepare_for_training_calculate_batch_size_2(sagemaker_session): lr = LinearLearner(base_job_name="lr", sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet( "s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=10000, feature_dim=FEATURE_DIM, channel="train", ) lr._prepare_for_training(data) assert lr.mini_batch_size == DEFAULT_MINI_BATCH_SIZE
def test_call_fit_pass_batch_size(base_fit, sagemaker_session): lr = LinearLearner(base_job_name='lr', sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet('s3://{}/{}'.format(BUCKET_NAME, PREFIX), num_records=10000, feature_dim=FEATURE_DIM, channel='train') lr.fit(data, 10) base_fit.assert_called_once() assert len(base_fit.call_args[0]) == 2 assert base_fit.call_args[0][0] == data assert base_fit.call_args[0][1] == 10
def test_init_required_positional(sagemaker_session): lr = LinearLearner(ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, PREDICTOR_TYPE, sagemaker_session=sagemaker_session) assert lr.role == ROLE assert lr.train_instance_count == TRAIN_INSTANCE_COUNT assert lr.train_instance_type == TRAIN_INSTANCE_TYPE assert lr.predictor_type == PREDICTOR_TYPE
def test_num_classes_is_required_for_multiclass_classifier(sagemaker_session): with pytest.raises(ValueError) as excinfo: test_params = ALL_REQ_ARGS.copy() test_params["predictor_type"] = 'multiclass_classifier' LinearLearner(sagemaker_session=sagemaker_session, **test_params) assert "For predictor_type 'multiclass_classifier', 'num_classes' should be set to a value greater than 2." in str( excinfo.value)
def test_init_required_named(sagemaker_session): lr = LinearLearner(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) assert lr.role == ALL_REQ_ARGS["role"] assert lr.instance_count == ALL_REQ_ARGS["instance_count"] assert lr.instance_type == ALL_REQ_ARGS["instance_type"] assert lr.predictor_type == ALL_REQ_ARGS["predictor_type"]
def test_call_fit_calculate_batch_size_2(base_fit, sagemaker_session): lr = LinearLearner(base_job_name="lr", sagemaker_session=sagemaker_session, **REQ_ARGS) data = RecordSet("s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=10000, feature_dim=FEATURE_DIM, channel='train') lr.fit(data) base_fit.assert_called_once() assert len(base_fit.call_args[0]) == 2 assert base_fit.call_args[0][0] == data assert base_fit.call_args[0][1] == DEFAULT_MINI_BATCH_SIZE
def test_init_required_named(sagemaker_session): lr = LinearLearner(sagemaker_session=sagemaker_session, **REQ_ARGS) assert lr.role == REQ_ARGS['role'] assert lr.train_instance_count == REQ_ARGS['train_instance_count'] assert lr.train_instance_type == REQ_ARGS['train_instance_type'] assert lr.predictor_type == DEFAULT_PREDICTOR_TYPE
def test_init_required_named(sagemaker_session): lr = LinearLearner(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) assert lr.role == ALL_REQ_ARGS['role'] assert lr.train_instance_count == ALL_REQ_ARGS['train_instance_count'] assert lr.train_instance_type == ALL_REQ_ARGS['train_instance_type'] assert lr.predictor_type == ALL_REQ_ARGS['predictor_type']
def test_linear_learner_multiclass(sagemaker_session, cpu_instance_type, training_set): job_name = unique_name_from_base("linear-learner") with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): training_set = training_set[0], training_set[1].astype( np.dtype("float32")) ll = LinearLearner( "SageMakerRole", 1, cpu_instance_type, predictor_type="multiclass_classifier", num_classes=10, sagemaker_session=sagemaker_session, ) ll.epochs = 1 ll.fit(ll.record_set(training_set[0][:200], training_set[1][:200]), job_name=job_name) with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): predictor = ll.deploy(1, cpu_instance_type, endpoint_name=job_name) result = predictor.predict(training_set[0][0:100]) assert len(result) == 100 for record in result: assert record.label["predicted_label"] is not None assert record.label["score"] is not None
def test_linear_learner_multiclass(sagemaker_session): job_name = unique_name_from_base('linear-learner') with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else { 'encoding': 'latin1' } # Load the data into memory as numpy arrays with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) train_set = train_set[0], train_set[1].astype(np.dtype('float32')) ll = LinearLearner('SageMakerRole', 1, 'ml.c4.2xlarge', predictor_type='multiclass_classifier', num_classes=10, sagemaker_session=sagemaker_session) ll.epochs = 1 ll.fit(ll.record_set(train_set[0][:200], train_set[1][:200]), job_name=job_name) with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): predictor = ll.deploy(1, 'ml.c4.xlarge', endpoint_name=job_name) result = predictor.predict(train_set[0][0:100]) assert len(result) == 100 for record in result: assert record.label["predicted_label"] is not None assert record.label["score"] is not None
def test_linear_learner_multiclass(sagemaker_session): with timeout(minutes=15): data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'} # Load the data into memory as numpy arrays with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) train_set = train_set[0], train_set[1].astype(np.dtype('float32')) ll = LinearLearner('SageMakerRole', 1, 'ml.c4.2xlarge', base_job_name='test-linear-learner', predictor_type='multiclass_classifier', num_classes=10, sagemaker_session=sagemaker_session) ll.epochs = 1 ll.fit(ll.record_set(train_set[0][:200], train_set[1][:200])) endpoint_name = name_from_base('linear-learner') with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): predictor = ll.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) result = predictor.predict(train_set[0][0:100]) assert len(result) == 100 for record in result: assert record.label["predicted_label"] is not None assert record.label["score"] is not None
def test_predictor_custom_serialization(sagemaker_session): lr = LinearLearner(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) data = RecordSet( "s3://{}/{}".format(BUCKET_NAME, PREFIX), num_records=1, feature_dim=FEATURE_DIM, channel="train", ) lr.fit(data) model = lr.create_model() custom_serializer = Mock() custom_deserializer = Mock() predictor = model.deploy( 1, INSTANCE_TYPE, serializer=custom_serializer, deserializer=custom_deserializer, ) assert isinstance(predictor, LinearLearnerPredictor) assert predictor.serializer is custom_serializer assert predictor.deserializer is custom_deserializer
def test_linear_learner_multiclass(sagemaker_session, cpu_instance_type): job_name = unique_name_from_base("linear-learner") with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz") pickle_args = {} if sys.version_info.major == 2 else { "encoding": "latin1" } # Load the data into memory as numpy arrays with gzip.open(data_path, "rb") as f: train_set, _, _ = pickle.load(f, **pickle_args) train_set = train_set[0], train_set[1].astype(np.dtype("float32")) ll = LinearLearner( "SageMakerRole", 1, cpu_instance_type, predictor_type="multiclass_classifier", num_classes=10, sagemaker_session=sagemaker_session, ) ll.epochs = 1 ll.fit(ll.record_set(train_set[0][:200], train_set[1][:200]), job_name=job_name) with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): predictor = ll.deploy(1, cpu_instance_type, endpoint_name=job_name) result = predictor.predict(train_set[0][0:100]) assert len(result) == 100 for record in result: assert record.label["predicted_label"] is not None assert record.label["score"] is not None
def test_image(sagemaker_session): lr = LinearLearner(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) assert lr.train_image() == registry(REGION, 'linear-learner') + '/linear-learner:1'
def test_optional_hyper_parameters_value(sagemaker_session, optional_hyper_parameters, value): with pytest.raises(ValueError): test_params = ALL_REQ_ARGS.copy() test_params.update({optional_hyper_parameters: value}) LinearLearner(sagemaker_session=sagemaker_session, **test_params)
def test_iterable_hyper_parameters_type(sagemaker_session, iterable_hyper_parameters, value): with pytest.raises(TypeError): test_params = ALL_REQ_ARGS.copy() test_params.update({iterable_hyper_parameters: value}) LinearLearner(sagemaker_session=sagemaker_session, **test_params)
def test_num_classes_can_be_string_for_multiclass_classifier( sagemaker_session): test_params = ALL_REQ_ARGS.copy() test_params["predictor_type"] = "multiclass_classifier" test_params["num_classes"] = "3" LinearLearner(sagemaker_session=sagemaker_session, **test_params)
def test_required_hyper_parameters_value(sagemaker_session, required_hyper_parameters, value): with pytest.raises(ValueError): test_params = ALL_REQ_ARGS.copy() test_params[required_hyper_parameters] = value LinearLearner(sagemaker_session=sagemaker_session, **test_params)
def test_image(sagemaker_session): lr = LinearLearner(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS) assert image_uris.retrieve("linear-learner", REGION) == lr.training_image_uri()
def test_linear_learner(sagemaker_session): with timeout(minutes=15): data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else { 'encoding': 'latin1' } # Load the data into memory as numpy arrays with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) train_set[1][:100] = 1 train_set[1][100:200] = 0 train_set = train_set[0], train_set[1].astype(np.dtype('float32')) ll = LinearLearner('SageMakerRole', 1, 'ml.c4.2xlarge', base_job_name='test-linear-learner', predictor_type='binary_classifier', sagemaker_session=sagemaker_session) ll.binary_classifier_model_selection_criteria = 'accuracy' ll.target_recall = 0.5 ll.target_precision = 0.5 ll.positive_example_weight_mult = 0.1 ll.epochs = 1 ll.use_bias = True ll.num_models = 1 ll.num_calibration_samples = 1 ll.init_method = 'uniform' ll.init_scale = 0.5 ll.init_sigma = 0.2 ll.init_bias = 5 ll.optimizer = 'adam' ll.loss = 'logistic' ll.wd = 0.5 ll.l1 = 0.5 ll.momentum = 0.5 ll.learning_rate = 0.1 ll.beta_1 = 0.1 ll.beta_2 = 0.1 ll.use_lr_scheduler = True ll.lr_scheduler_step = 2 ll.lr_scheduler_factor = 0.5 ll.lr_scheduler_minimum_lr = 0.1 ll.normalize_data = False ll.normalize_label = False ll.unbias_data = True ll.unbias_label = False ll.num_point_for_scaler = 10000 ll.margin = 1.0 ll.quantile = 0.5 ll.loss_insensitivity = 0.1 ll.huber_delta = 0.1 ll.early_stopping_tolerance = 0.0001 ll.early_stopping_patience = 3 ll.fit(ll.record_set(train_set[0][:200], train_set[1][:200])) endpoint_name = name_from_base('linear-learner') with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): predictor = ll.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) result = predictor.predict(train_set[0][0:100]) assert len(result) == 100 for record in result: assert record.label["predicted_label"] is not None assert record.label["score"] is not None
def test_linear_learner(sagemaker_session, cpu_instance_type): job_name = unique_name_from_base("linear-learner") with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz") pickle_args = {} if sys.version_info.major == 2 else { "encoding": "latin1" } # Load the data into memory as numpy arrays with gzip.open(data_path, "rb") as f: train_set, _, _ = pickle.load(f, **pickle_args) train_set[1][:100] = 1 train_set[1][100:200] = 0 train_set = train_set[0], train_set[1].astype(np.dtype("float32")) ll = LinearLearner( "SageMakerRole", 1, cpu_instance_type, predictor_type="binary_classifier", sagemaker_session=sagemaker_session, ) ll.binary_classifier_model_selection_criteria = "accuracy" ll.target_recall = 0.5 ll.target_precision = 0.5 ll.positive_example_weight_mult = 0.1 ll.epochs = 1 ll.use_bias = True ll.num_models = 1 ll.num_calibration_samples = 1 ll.init_method = "uniform" ll.init_scale = 0.5 ll.init_sigma = 0.2 ll.init_bias = 5 ll.optimizer = "adam" ll.loss = "logistic" ll.wd = 0.5 ll.l1 = 0.5 ll.momentum = 0.5 ll.learning_rate = 0.1 ll.beta_1 = 0.1 ll.beta_2 = 0.1 ll.use_lr_scheduler = True ll.lr_scheduler_step = 2 ll.lr_scheduler_factor = 0.5 ll.lr_scheduler_minimum_lr = 0.1 ll.normalize_data = False ll.normalize_label = False ll.unbias_data = True ll.unbias_label = False ll.num_point_for_scaler = 10000 ll.margin = 1.0 ll.quantile = 0.5 ll.loss_insensitivity = 0.1 ll.huber_delta = 0.1 ll.early_stopping_tolerance = 0.0001 ll.early_stopping_patience = 3 ll.fit(ll.record_set(train_set[0][:200], train_set[1][:200]), job_name=job_name) with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): predictor = ll.deploy(1, cpu_instance_type, endpoint_name=job_name) result = predictor.predict(train_set[0][0:100]) assert len(result) == 100 for record in result: assert record.label["predicted_label"] is not None assert record.label["score"] is not None
def test_async_linear_learner(sagemaker_session, cpu_instance_type, training_set): job_name = unique_name_from_base("linear-learner") with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): training_set[1][:100] = 1 training_set[1][100:200] = 0 training_set = training_set[0], training_set[1].astype( np.dtype("float32")) ll = LinearLearner( "SageMakerRole", 1, cpu_instance_type, predictor_type="binary_classifier", sagemaker_session=sagemaker_session, ) ll.binary_classifier_model_selection_criteria = "accuracy" ll.target_recall = 0.5 ll.target_precision = 0.5 ll.positive_example_weight_mult = 0.1 ll.epochs = 1 ll.use_bias = True ll.num_models = 1 ll.num_calibration_samples = 1 ll.init_method = "uniform" ll.init_scale = 0.5 ll.init_sigma = 0.2 ll.init_bias = 5 ll.optimizer = "adam" ll.loss = "logistic" ll.wd = 0.5 ll.l1 = 0.5 ll.momentum = 0.5 ll.learning_rate = 0.1 ll.beta_1 = 0.1 ll.beta_2 = 0.1 ll.use_lr_scheduler = True ll.lr_scheduler_step = 2 ll.lr_scheduler_factor = 0.5 ll.lr_scheduler_minimum_lr = 0.1 ll.normalize_data = False ll.normalize_label = False ll.unbias_data = True ll.unbias_label = False ll.num_point_for_scaler = 10000 ll.margin = 1.0 ll.quantile = 0.5 ll.loss_insensitivity = 0.1 ll.huber_delta = 0.1 ll.early_stopping_tolerance = 0.0001 ll.early_stopping_patience = 3 ll.fit( ll.record_set(training_set[0][:200], training_set[1][:200]), wait=False, job_name=job_name, ) print("Waiting to re-attach to the training job: %s" % job_name) time.sleep(20) with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): estimator = LinearLearner.attach(training_job_name=job_name, sagemaker_session=sagemaker_session) model = LinearLearnerModel(estimator.model_data, role="SageMakerRole", sagemaker_session=sagemaker_session) predictor = model.deploy(1, cpu_instance_type, endpoint_name=job_name) result = predictor.predict(training_set[0][0:100]) assert len(result) == 100 for record in result: assert record.label["predicted_label"] is not None assert record.label["score"] is not None
def test_all_hyperparameters(sagemaker_session): lr = LinearLearner( sagemaker_session=sagemaker_session, binary_classifier_model_selection_criteria="accuracy", target_recall=0.5, target_precision=0.6, positive_example_weight_mult=0.1, epochs=1, use_bias=True, num_models=5, num_calibration_samples=6, init_method="uniform", init_scale=0.1, init_sigma=0.001, init_bias=0, optimizer="sgd", loss="logistic", wd=0.4, l1=0.04, momentum=0.1, learning_rate=0.001, beta_1=0.2, beta_2=0.03, bias_lr_mult=5.5, bias_wd_mult=6.6, use_lr_scheduler=False, lr_scheduler_step=2, lr_scheduler_factor=0.03, lr_scheduler_minimum_lr=0.001, normalize_data=False, normalize_label=True, unbias_data=True, unbias_label=False, num_point_for_scaler=3, margin=1.0, quantile=0.5, loss_insensitivity=0.1, huber_delta=0.1, early_stopping_patience=3, early_stopping_tolerance=0.001, num_classes=1, accuracy_top_k=3, f_beta=1.0, balance_multiclass_weights=False, **ALL_REQ_ARGS, ) assert lr.hyperparameters() == dict( predictor_type="binary_classifier", binary_classifier_model_selection_criteria="accuracy", target_recall="0.5", target_precision="0.6", positive_example_weight_mult="0.1", epochs="1", use_bias="True", num_models="5", num_calibration_samples="6", init_method="uniform", init_scale="0.1", init_sigma="0.001", init_bias="0.0", optimizer="sgd", loss="logistic", wd="0.4", l1="0.04", momentum="0.1", learning_rate="0.001", beta_1="0.2", beta_2="0.03", bias_lr_mult="5.5", bias_wd_mult="6.6", use_lr_scheduler="False", lr_scheduler_step="2", lr_scheduler_factor="0.03", lr_scheduler_minimum_lr="0.001", normalize_data="False", normalize_label="True", unbias_data="True", unbias_label="False", num_point_for_scaler="3", margin="1.0", quantile="0.5", loss_insensitivity="0.1", huber_delta="0.1", early_stopping_patience="3", early_stopping_tolerance="0.001", num_classes="1", accuracy_top_k="3", f_beta="1.0", balance_multiclass_weights="False", )
def test_linear_learner(sagemaker_session): with timeout(minutes=15): data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'} # Load the data into memory as numpy arrays with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) train_set[1][:100] = 1 train_set[1][100:200] = 0 train_set = train_set[0], train_set[1].astype(np.dtype('float32')) ll = LinearLearner('SageMakerRole', 1, 'ml.c4.2xlarge', base_job_name='test-linear-learner', predictor_type='binary_classifier', sagemaker_session=sagemaker_session) ll.binary_classifier_model_selection_criteria = 'accuracy' ll.target_recall = 0.5 ll.target_precision = 0.5 ll.positive_example_weight_mult = 0.1 ll.epochs = 1 ll.use_bias = True ll.num_models = 1 ll.num_calibration_samples = 1 ll.init_method = 'uniform' ll.init_scale = 0.5 ll.init_sigma = 0.2 ll.init_bias = 5 ll.optimizer = 'adam' ll.loss = 'logistic' ll.wd = 0.5 ll.l1 = 0.5 ll.momentum = 0.5 ll.learning_rate = 0.1 ll.beta_1 = 0.1 ll.beta_2 = 0.1 ll.use_lr_scheduler = True ll.lr_scheduler_step = 2 ll.lr_scheduler_factor = 0.5 ll.lr_scheduler_minimum_lr = 0.1 ll.normalize_data = False ll.normalize_label = False ll.unbias_data = True ll.unbias_label = False ll.num_point_for_scaler = 10000 ll.margin = 1.0 ll.quantile = 0.5 ll.loss_insensitivity = 0.1 ll.huber_delta = 0.1 ll.early_stopping_tolerance = 0.0001 ll.early_stopping_patience = 3 ll.fit(ll.record_set(train_set[0][:200], train_set[1][:200])) endpoint_name = name_from_base('linear-learner') with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): predictor = ll.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) result = predictor.predict(train_set[0][0:100]) assert len(result) == 100 for record in result: assert record.label["predicted_label"] is not None assert record.label["score"] is not None
def model_name(sagemaker_session, cpu_instance_type, training_set): job_name = utils.unique_name_from_base("clarify-xgb") with timeout.timeout(minutes=integ.TRAINING_DEFAULT_TIMEOUT_MINUTES): ll = LinearLearner( "SageMakerRole", 1, cpu_instance_type, predictor_type="binary_classifier", sagemaker_session=sagemaker_session, disable_profiler=True, ) ll.binary_classifier_model_selection_criteria = "accuracy" ll.early_stopping_tolerance = 0.0001 ll.early_stopping_patience = 3 ll.num_models = 1 ll.epochs = 1 ll.num_calibration_samples = 1 features, label = training_set ll.fit( ll.record_set(features.astype(np.float32), label.reshape(-1).astype(np.float32)), job_name=job_name, ) with timeout.timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): ll.deploy(1, cpu_instance_type, endpoint_name=job_name, model_name=job_name, wait=True) yield job_name