def model_name(sagemaker_session, cpu_instance_type, training_set): job_name = utils.unique_name_from_base("clarify-xgb") with timeout.timeout(minutes=integ.TRAINING_DEFAULT_TIMEOUT_MINUTES): ll = LinearLearner( "SageMakerRole", 1, cpu_instance_type, predictor_type="binary_classifier", sagemaker_session=sagemaker_session, disable_profiler=True, ) ll.binary_classifier_model_selection_criteria = "accuracy" ll.early_stopping_tolerance = 0.0001 ll.early_stopping_patience = 3 ll.num_models = 1 ll.epochs = 1 ll.num_calibration_samples = 1 features, label = training_set ll.fit( ll.record_set(features.astype(np.float32), label.reshape(-1).astype(np.float32)), job_name=job_name, ) with timeout.timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): ll.deploy(1, cpu_instance_type, endpoint_name=job_name, model_name=job_name, wait=True) yield job_name
def test_linear_learner_multiclass(sagemaker_session): with timeout(minutes=15): data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'} # Load the data into memory as numpy arrays with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) train_set = train_set[0], train_set[1].astype(np.dtype('float32')) ll = LinearLearner('SageMakerRole', 1, 'ml.c4.2xlarge', base_job_name='test-linear-learner', predictor_type='multiclass_classifier', num_classes=10, sagemaker_session=sagemaker_session) ll.epochs = 1 ll.fit(ll.record_set(train_set[0][:200], train_set[1][:200])) endpoint_name = name_from_base('linear-learner') with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): predictor = ll.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) result = predictor.predict(train_set[0][0:100]) assert len(result) == 100 for record in result: assert record.label["predicted_label"] is not None assert record.label["score"] is not None
def test_linear_learner_multiclass(sagemaker_session): job_name = unique_name_from_base('linear-learner') with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else { 'encoding': 'latin1' } # Load the data into memory as numpy arrays with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) train_set = train_set[0], train_set[1].astype(np.dtype('float32')) ll = LinearLearner('SageMakerRole', 1, 'ml.c4.2xlarge', predictor_type='multiclass_classifier', num_classes=10, sagemaker_session=sagemaker_session) ll.epochs = 1 ll.fit(ll.record_set(train_set[0][:200], train_set[1][:200]), job_name=job_name) with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): predictor = ll.deploy(1, 'ml.c4.xlarge', endpoint_name=job_name) result = predictor.predict(train_set[0][0:100]) assert len(result) == 100 for record in result: assert record.label["predicted_label"] is not None assert record.label["score"] is not None
def test_linear_learner_multiclass(sagemaker_session, cpu_instance_type, training_set): job_name = unique_name_from_base("linear-learner") with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): training_set = training_set[0], training_set[1].astype( np.dtype("float32")) ll = LinearLearner( "SageMakerRole", 1, cpu_instance_type, predictor_type="multiclass_classifier", num_classes=10, sagemaker_session=sagemaker_session, ) ll.epochs = 1 ll.fit(ll.record_set(training_set[0][:200], training_set[1][:200]), job_name=job_name) with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): predictor = ll.deploy(1, cpu_instance_type, endpoint_name=job_name) result = predictor.predict(training_set[0][0:100]) assert len(result) == 100 for record in result: assert record.label["predicted_label"] is not None assert record.label["score"] is not None
def test_linear_learner(): with timeout(minutes=15): sagemaker_session = sagemaker.Session(boto_session=boto3.Session(region_name=REGION)) data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'} # Load the data into memory as numpy arrays with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) train_set[1][:100] = 1 train_set[1][100:200] = 0 train_set = train_set[0], train_set[1].astype(np.dtype('float32')) ll = LinearLearner('SageMakerRole', 1, 'ml.c4.2xlarge', base_job_name='test-linear-learner', sagemaker_session=sagemaker_session) ll.binary_classifier_model_selection_criteria = 'accuracy' ll.target_reacall = 0.5 ll.target_precision = 0.5 ll.positive_example_weight_mult = 0.1 ll.epochs = 1 ll.predictor_type = 'binary_classifier' ll.use_bias = True ll.num_models = 1 ll.num_calibration_samples = 1 ll.init_method = 'uniform' ll.init_scale = 0.5 ll.init_sigma = 0.2 ll.init_bias = 5 ll.optimizer = 'adam' ll.loss = 'logistic' ll.wd = 0.5 ll.l1 = 0.5 ll.momentum = 0.5 ll.learning_rate = 0.1 ll.beta_1 = 0.1 ll.beta_2 = 0.1 ll.use_lr_scheduler = True ll.lr_scheduler_step = 2 ll.lr_scheduler_factor = 0.5 ll.lr_scheduler_minimum_lr = 0.1 ll.normalize_data = False ll.normalize_label = False ll.unbias_data = True ll.unbias_label = False ll.num_point_for_scala = 10000 ll.fit(ll.record_set(train_set[0][:200], train_set[1][:200])) with timeout(minutes=15): model = LinearLearnerModel(ll.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.c4.xlarge') try: result = predictor.predict(train_set[0][0:100]) assert len(result) == 100 for record in result: assert record.label["predicted_label"] is not None assert record.label["score"] is not None finally: sagemaker_session.delete_endpoint(predictor.endpoint)
def test_linear_learner(sagemaker_session, cpu_instance_type, training_set): job_name = unique_name_from_base("linear-learner") with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): training_set[1][:100] = 1 training_set[1][100:200] = 0 training_set = training_set[0], training_set[1].astype(np.dtype("float32")) ll = LinearLearner( "SageMakerRole", 1, cpu_instance_type, predictor_type="binary_classifier", sagemaker_session=sagemaker_session, ) ll.binary_classifier_model_selection_criteria = "accuracy" ll.target_recall = 0.5 ll.target_precision = 0.5 ll.positive_example_weight_mult = 0.1 ll.epochs = 1 ll.use_bias = True ll.num_models = 1 ll.num_calibration_samples = 1 ll.init_method = "uniform" ll.init_scale = 0.5 ll.init_sigma = 0.2 ll.init_bias = 5 ll.optimizer = "adam" ll.loss = "logistic" ll.wd = 0.5 ll.l1 = 0.5 ll.momentum = 0.5 ll.learning_rate = 0.1 ll.beta_1 = 0.1 ll.beta_2 = 0.1 ll.use_lr_scheduler = True ll.lr_scheduler_step = 2 ll.lr_scheduler_factor = 0.5 ll.lr_scheduler_minimum_lr = 0.1 ll.normalize_data = False ll.normalize_label = False ll.unbias_data = True ll.unbias_label = False ll.num_point_for_scaler = 10000 ll.margin = 1.0 ll.quantile = 0.5 ll.loss_insensitivity = 0.1 ll.huber_delta = 0.1 ll.early_stopping_tolerance = 0.0001 ll.early_stopping_patience = 3 ll.fit(ll.record_set(training_set[0][:200], training_set[1][:200]), job_name=job_name) with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): predictor = ll.deploy(1, cpu_instance_type, endpoint_name=job_name) result = predictor.predict(training_set[0][0:100]) assert len(result) == 100 for record in result: assert record.label["predicted_label"] is not None assert record.label["score"] is not None
def test_linear_learner_multiclass(sagemaker_session, cpu_instance_type): job_name = unique_name_from_base("linear-learner") with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz") pickle_args = {} if sys.version_info.major == 2 else { "encoding": "latin1" } # Load the data into memory as numpy arrays with gzip.open(data_path, "rb") as f: train_set, _, _ = pickle.load(f, **pickle_args) train_set = train_set[0], train_set[1].astype(np.dtype("float32")) ll = LinearLearner( "SageMakerRole", 1, cpu_instance_type, predictor_type="multiclass_classifier", num_classes=10, sagemaker_session=sagemaker_session, ) ll.epochs = 1 ll.fit(ll.record_set(train_set[0][:200], train_set[1][:200]), job_name=job_name) with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): predictor = ll.deploy(1, cpu_instance_type, endpoint_name=job_name) result = predictor.predict(train_set[0][0:100]) assert len(result) == 100 for record in result: assert record.label["predicted_label"] is not None assert record.label["score"] is not None
def test_async_linear_learner(sagemaker_session): training_job_name = "" endpoint_name = 'test-linear-learner-async-{}'.format( sagemaker_timestamp()) with timeout(minutes=5): data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else { 'encoding': 'latin1' } # Load the data into memory as numpy arrays with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) train_set[1][:100] = 1 train_set[1][100:200] = 0 train_set = train_set[0], train_set[1].astype(np.dtype('float32')) ll = LinearLearner('SageMakerRole', 1, 'ml.c4.2xlarge', base_job_name='test-linear-learner', predictor_type='binary_classifier', sagemaker_session=sagemaker_session) ll.binary_classifier_model_selection_criteria = 'accuracy' ll.target_recall = 0.5 ll.target_precision = 0.5 ll.positive_example_weight_mult = 0.1 ll.epochs = 1 ll.use_bias = True ll.num_models = 1 ll.num_calibration_samples = 1 ll.init_method = 'uniform' ll.init_scale = 0.5 ll.init_sigma = 0.2 ll.init_bias = 5 ll.optimizer = 'adam' ll.loss = 'logistic' ll.wd = 0.5 ll.l1 = 0.5 ll.momentum = 0.5 ll.learning_rate = 0.1 ll.beta_1 = 0.1 ll.beta_2 = 0.1 ll.use_lr_scheduler = True ll.lr_scheduler_step = 2 ll.lr_scheduler_factor = 0.5 ll.lr_scheduler_minimum_lr = 0.1 ll.normalize_data = False ll.normalize_label = False ll.unbias_data = True ll.unbias_label = False ll.num_point_for_scaler = 10000 ll.margin = 1.0 ll.quantile = 0.5 ll.loss_insensitivity = 0.1 ll.huber_delta = 0.1 ll.early_stopping_tolerance = 0.0001 ll.early_stopping_patience = 3 ll.fit(ll.record_set(train_set[0][:200], train_set[1][:200]), wait=False) training_job_name = ll.latest_training_job.name print("Waiting to re-attach to the training job: %s" % training_job_name) time.sleep(20) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): estimator = LinearLearner.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session) model = LinearLearnerModel(estimator.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) result = predictor.predict(train_set[0][0:100]) assert len(result) == 100 for record in result: assert record.label["predicted_label"] is not None assert record.label["score"] is not None
def test_async_linear_learner(sagemaker_session, cpu_instance_type): job_name = unique_name_from_base("linear-learner") with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz") pickle_args = {} if sys.version_info.major == 2 else { "encoding": "latin1" } # Load the data into memory as numpy arrays with gzip.open(data_path, "rb") as f: train_set, _, _ = pickle.load(f, **pickle_args) train_set[1][:100] = 1 train_set[1][100:200] = 0 train_set = train_set[0], train_set[1].astype(np.dtype("float32")) ll = LinearLearner( "SageMakerRole", 1, cpu_instance_type, predictor_type="binary_classifier", sagemaker_session=sagemaker_session, ) ll.binary_classifier_model_selection_criteria = "accuracy" ll.target_recall = 0.5 ll.target_precision = 0.5 ll.positive_example_weight_mult = 0.1 ll.epochs = 1 ll.use_bias = True ll.num_models = 1 ll.num_calibration_samples = 1 ll.init_method = "uniform" ll.init_scale = 0.5 ll.init_sigma = 0.2 ll.init_bias = 5 ll.optimizer = "adam" ll.loss = "logistic" ll.wd = 0.5 ll.l1 = 0.5 ll.momentum = 0.5 ll.learning_rate = 0.1 ll.beta_1 = 0.1 ll.beta_2 = 0.1 ll.use_lr_scheduler = True ll.lr_scheduler_step = 2 ll.lr_scheduler_factor = 0.5 ll.lr_scheduler_minimum_lr = 0.1 ll.normalize_data = False ll.normalize_label = False ll.unbias_data = True ll.unbias_label = False ll.num_point_for_scaler = 10000 ll.margin = 1.0 ll.quantile = 0.5 ll.loss_insensitivity = 0.1 ll.huber_delta = 0.1 ll.early_stopping_tolerance = 0.0001 ll.early_stopping_patience = 3 ll.fit(ll.record_set(train_set[0][:200], train_set[1][:200]), wait=False, job_name=job_name) print("Waiting to re-attach to the training job: %s" % job_name) time.sleep(20) with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): estimator = LinearLearner.attach(training_job_name=job_name, sagemaker_session=sagemaker_session) model = LinearLearnerModel(estimator.model_data, role="SageMakerRole", sagemaker_session=sagemaker_session) predictor = model.deploy(1, cpu_instance_type, endpoint_name=job_name) result = predictor.predict(train_set[0][0:100]) assert len(result) == 100 for record in result: assert record.label["predicted_label"] is not None assert record.label["score"] is not None
def test_linear_learner(sagemaker_session): with timeout(minutes=15): data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'} # Load the data into memory as numpy arrays with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) train_set[1][:100] = 1 train_set[1][100:200] = 0 train_set = train_set[0], train_set[1].astype(np.dtype('float32')) ll = LinearLearner('SageMakerRole', 1, 'ml.c4.2xlarge', base_job_name='test-linear-learner', predictor_type='binary_classifier', sagemaker_session=sagemaker_session) ll.binary_classifier_model_selection_criteria = 'accuracy' ll.target_recall = 0.5 ll.target_precision = 0.5 ll.positive_example_weight_mult = 0.1 ll.epochs = 1 ll.use_bias = True ll.num_models = 1 ll.num_calibration_samples = 1 ll.init_method = 'uniform' ll.init_scale = 0.5 ll.init_sigma = 0.2 ll.init_bias = 5 ll.optimizer = 'adam' ll.loss = 'logistic' ll.wd = 0.5 ll.l1 = 0.5 ll.momentum = 0.5 ll.learning_rate = 0.1 ll.beta_1 = 0.1 ll.beta_2 = 0.1 ll.use_lr_scheduler = True ll.lr_scheduler_step = 2 ll.lr_scheduler_factor = 0.5 ll.lr_scheduler_minimum_lr = 0.1 ll.normalize_data = False ll.normalize_label = False ll.unbias_data = True ll.unbias_label = False ll.num_point_for_scaler = 10000 ll.margin = 1.0 ll.quantile = 0.5 ll.loss_insensitivity = 0.1 ll.huber_delta = 0.1 ll.early_stopping_tolerance = 0.0001 ll.early_stopping_patience = 3 ll.fit(ll.record_set(train_set[0][:200], train_set[1][:200])) endpoint_name = name_from_base('linear-learner') with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): predictor = ll.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) result = predictor.predict(train_set[0][0:100]) assert len(result) == 100 for record in result: assert record.label["predicted_label"] is not None assert record.label["score"] is not None