def test_async_fit_deploy(sagemaker_session, pytorch_full_version): training_job_name = "" # TODO: add tests against local mode when it's ready to be used instance_type = 'ml.p2.xlarge' with timeout(minutes=10): pytorch = _get_pytorch_estimator(sagemaker_session, pytorch_full_version, instance_type) pytorch.fit({'training': _upload_training_data(pytorch)}, wait=False) training_job_name = pytorch.latest_training_job.name print("Waiting to re-attach to the training job: %s" % training_job_name) time.sleep(20) if not _is_local_mode(instance_type): endpoint_name = 'test-pytorch-async-fit-attach-deploy-{}'.format(sagemaker_timestamp()) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): print("Re-attaching now to: %s" % training_job_name) estimator = PyTorch.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session) predictor = estimator.deploy(1, instance_type, endpoint_name=endpoint_name) batch_size = 100 data = numpy.random.rand(batch_size, 1, 28, 28).astype(numpy.float32) output = predictor.predict(data) assert output.shape == (batch_size, 10)
def test_factorization_machines(sagemaker_session): with timeout(minutes=15): data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'} # Load the data into memory as numpy arrays with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) fm = FactorizationMachines(role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge', num_factors=10, predictor_type='regressor', epochs=2, clip_gradient=1e2, eps=0.001, rescale_grad=1.0 / 100, sagemaker_session=sagemaker_session, base_job_name='test-fm') # training labels must be 'float32' fm.fit(fm.record_set(train_set[0][:200], train_set[1][:200].astype('float32'))) endpoint_name = name_from_base('fm') with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): model = FactorizationMachinesModel(fm.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) result = predictor.predict(train_set[0][:10]) assert len(result) == 10 for record in result: assert record.label["score"] is not None
def test_ntm(sagemaker_session): with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): data_path = os.path.join(DATA_DIR, 'ntm') data_filename = 'nips-train_1.pbr' with open(os.path.join(data_path, data_filename), 'rb') as f: all_records = read_records(f) # all records must be same feature_num = int(all_records[0].features['values'].float32_tensor.shape[0]) ntm = NTM(role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge', num_topics=10, sagemaker_session=sagemaker_session, base_job_name='test-ntm') record_set = prepare_record_set_from_local_files(data_path, ntm.data_location, len(all_records), feature_num, sagemaker_session) ntm.fit(record_set, None) endpoint_name = name_from_base('ntm') with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): model = NTMModel(ntm.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) predict_input = np.random.rand(1, feature_num) result = predictor.predict(predict_input) assert len(result) == 1 for record in result: assert record.label["topic_weights"] is not None
def test_attach_deploy(chainer_training_job, sagemaker_session): endpoint_name = 'test-chainer-attach-deploy-{}'.format(sagemaker_timestamp()) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): estimator = Chainer.attach(chainer_training_job, sagemaker_session=sagemaker_session) predictor = estimator.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name) _predict_and_assert(predictor)
def test_knn_regressor(sagemaker_session): with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'} # Load the data into memory as numpy arrays with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) knn = KNN(role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge', k=10, predictor_type='regressor', sample_size=500, sagemaker_session=sagemaker_session, base_job_name='test-knn-rr') # training labels must be 'float32' knn.fit(knn.record_set(train_set[0][:200], train_set[1][:200].astype('float32'))) endpoint_name = name_from_base('knn') with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): model = KNNModel(knn.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) result = predictor.predict(train_set[0][:10]) assert len(result) == 10 for record in result: assert record.label["score"] is not None
def test_tf(sagemaker_session, tf_full_version): with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): script_path = os.path.join(DATA_DIR, 'iris', 'iris-dnn-classifier.py') estimator = TensorFlow(entry_point=script_path, role='SageMakerRole', framework_version=tf_full_version, training_steps=1, evaluation_steps=1, hyperparameters={'input_tensor_name': 'inputs'}, train_instance_count=1, train_instance_type='ml.c4.xlarge', sagemaker_session=sagemaker_session, base_job_name='test-tf') inputs = sagemaker_session.upload_data(path=DATA_PATH, key_prefix='integ-test-data/tf_iris') estimator.fit(inputs) print('job succeeded: {}'.format(estimator.latest_training_job.name)) endpoint_name = estimator.latest_training_job.name with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): json_predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.c4.xlarge', endpoint_name=endpoint_name) features = [6.4, 3.2, 4.5, 1.5] dict_result = json_predictor.predict({'inputs': features}) print('predict result: {}'.format(dict_result)) list_result = json_predictor.predict(features) print('predict result: {}'.format(list_result)) assert dict_result == list_result
def test_pca(sagemaker_session): with timeout(minutes=15): data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'} # Load the data into memory as numpy arrays with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) pca = sagemaker.amazon.pca.PCA(role='SageMakerRole', train_instance_count=1, train_instance_type='ml.m4.xlarge', num_components=48, sagemaker_session=sagemaker_session, base_job_name='test-pca') pca.algorithm_mode = 'randomized' pca.subtract_mean = True pca.extra_components = 5 pca.fit(pca.record_set(train_set[0][:100])) endpoint_name = name_from_base('pca') with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): pca_model = sagemaker.amazon.pca.PCAModel(model_data=pca.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = pca_model.deploy(initial_instance_count=1, instance_type="ml.c4.xlarge", endpoint_name=endpoint_name) result = predictor.predict(train_set[0][:5]) assert len(result) == 5 for record in result: assert record.label["projection"] is not None
def test_cifar(sagemaker_session, tf_full_version): with timeout(minutes=45): script_path = os.path.join(DATA_DIR, 'cifar_10', 'source') dataset_path = os.path.join(DATA_DIR, 'cifar_10', 'data') estimator = TensorFlow(entry_point='resnet_cifar_10.py', source_dir=script_path, role='SageMakerRole', framework_version=tf_full_version, training_steps=500, evaluation_steps=5, train_instance_count=2, train_instance_type='ml.p2.xlarge', sagemaker_session=sagemaker_session, train_max_run=45 * 60, base_job_name='test-cifar') inputs = estimator.sagemaker_session.upload_data(path=dataset_path, key_prefix='data/cifar10') estimator.fit(inputs, logs=False) print('job succeeded: {}'.format(estimator.latest_training_job.name)) endpoint_name = estimator.latest_training_job.name with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.p2.xlarge') predictor.serializer = PickleSerializer() predictor.content_type = PICKLE_CONTENT_TYPE data = np.random.randn(32, 32, 3) predict_response = predictor.predict(data) assert len(predict_response['outputs']['probabilities']['floatVal']) == 10
def test_tf_async(sagemaker_session): with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): script_path = os.path.join(DATA_DIR, 'iris', 'iris-dnn-classifier.py') estimator = TensorFlow(entry_point=script_path, role='SageMakerRole', training_steps=1, evaluation_steps=1, hyperparameters={'input_tensor_name': 'inputs'}, train_instance_count=1, train_instance_type='ml.c4.xlarge', sagemaker_session=sagemaker_session, base_job_name='test-tf') inputs = estimator.sagemaker_session.upload_data(path=DATA_PATH, key_prefix='integ-test-data/tf_iris') estimator.fit(inputs, wait=False) training_job_name = estimator.latest_training_job.name time.sleep(20) endpoint_name = training_job_name with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): estimator = TensorFlow.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session) json_predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.c4.xlarge', endpoint_name=endpoint_name) result = json_predictor.predict([6.4, 3.2, 4.5, 1.5]) print('predict result: {}'.format(result))
def test_async_fit(sagemaker_session): endpoint_name = 'test-mxnet-attach-deploy-{}'.format(sagemaker_timestamp()) with timeout(minutes=5): script_path = os.path.join(DATA_DIR, 'mxnet_mnist', 'mnist.py') data_path = os.path.join(DATA_DIR, 'mxnet_mnist') mx = MXNet(entry_point=script_path, role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge', sagemaker_session=sagemaker_session) train_input = mx.sagemaker_session.upload_data(path=os.path.join(data_path, 'train'), key_prefix='integ-test-data/mxnet_mnist/train') test_input = mx.sagemaker_session.upload_data(path=os.path.join(data_path, 'test'), key_prefix='integ-test-data/mxnet_mnist/test') mx.fit({'train': train_input, 'test': test_input}, wait=False) training_job_name = mx.latest_training_job.name print("Waiting to re-attach to the training job: %s" % training_job_name) time.sleep(20) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): print("Re-attaching now to: %s" % training_job_name) estimator = MXNet.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session) predictor = estimator.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name) data = numpy.zeros(shape=(1, 1, 28, 28)) predictor.predict(data)
def test_lda(sagemaker_session): with timeout(minutes=15): data_path = os.path.join(DATA_DIR, 'lda') data_filename = 'nips-train_1.pbr' with open(os.path.join(data_path, data_filename), 'rb') as f: all_records = read_records(f) # all records must be same feature_num = int(all_records[0].features['values'].float32_tensor.shape[0]) lda = LDA(role='SageMakerRole', train_instance_type='ml.c4.xlarge', num_topics=10, sagemaker_session=sagemaker_session, base_job_name='test-lda') record_set = prepare_record_set_from_local_files(data_path, lda.data_location, len(all_records), feature_num, sagemaker_session) lda.fit(record_set, 100) endpoint_name = name_from_base('lda') with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): model = LDAModel(lda.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) predict_input = np.random.rand(1, feature_num) result = predictor.predict(predict_input) assert len(result) == 1 for record in result: assert record.label["topic_mixture"] is not None
def test_linear_learner_multiclass(sagemaker_session): with timeout(minutes=15): data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'} # Load the data into memory as numpy arrays with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) train_set = train_set[0], train_set[1].astype(np.dtype('float32')) ll = LinearLearner('SageMakerRole', 1, 'ml.c4.2xlarge', base_job_name='test-linear-learner', predictor_type='multiclass_classifier', num_classes=10, sagemaker_session=sagemaker_session) ll.epochs = 1 ll.fit(ll.record_set(train_set[0][:200], train_set[1][:200])) endpoint_name = name_from_base('linear-learner') with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): predictor = ll.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) result = predictor.predict(train_set[0][0:100]) assert len(result) == 100 for record in result: assert record.label["predicted_label"] is not None assert record.label["score"] is not None
def test_tuning_mxnet(sagemaker_session): with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES): script_path = os.path.join(DATA_DIR, 'mxnet_mnist', 'tuning.py') data_path = os.path.join(DATA_DIR, 'mxnet_mnist') estimator = MXNet(entry_point=script_path, role='SageMakerRole', train_instance_count=1, train_instance_type='ml.m4.xlarge', sagemaker_session=sagemaker_session, base_job_name='tune-mxnet') hyperparameter_ranges = {'learning_rate': ContinuousParameter(0.01, 0.2)} objective_metric_name = 'Validation-accuracy' metric_definitions = [{'Name': 'Validation-accuracy', 'Regex': 'Validation-accuracy=([0-9\\.]+)'}] tuner = HyperparameterTuner(estimator, objective_metric_name, hyperparameter_ranges, metric_definitions, max_jobs=4, max_parallel_jobs=2) train_input = estimator.sagemaker_session.upload_data(path=os.path.join(data_path, 'train'), key_prefix='integ-test-data/mxnet_mnist/train') test_input = estimator.sagemaker_session.upload_data(path=os.path.join(data_path, 'test'), key_prefix='integ-test-data/mxnet_mnist/test') tuner.fit({'train': train_input, 'test': test_input}) print('Started hyperparameter tuning job with name:' + tuner.latest_tuning_job.name) time.sleep(15) tuner.wait() best_training_job = tuner.best_training_job() with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session): predictor = tuner.deploy(1, 'ml.c4.xlarge') data = np.zeros(shape=(1, 1, 28, 28)) predictor.predict(data)
def test_attach_deploy(mxnet_training_job, sagemaker_session): endpoint_name = 'test-mxnet-attach-deploy-{}'.format(sagemaker_timestamp()) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): estimator = MXNet.attach(mxnet_training_job, sagemaker_session=sagemaker_session) predictor = estimator.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name) data = numpy.zeros(shape=(1, 1, 28, 28)) predictor.predict(data)
def test_deploy_model(chainer_training_job, sagemaker_session): endpoint_name = 'test-chainer-deploy-model-{}'.format(sagemaker_timestamp()) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): desc = sagemaker_session.sagemaker_client.describe_training_job(TrainingJobName=chainer_training_job) model_data = desc['ModelArtifacts']['S3ModelArtifacts'] script_path = os.path.join(DATA_DIR, 'chainer_mnist', 'mnist.py') model = ChainerModel(model_data, 'SageMakerRole', entry_point=script_path, sagemaker_session=sagemaker_session) predictor = model.deploy(1, "ml.m4.xlarge", endpoint_name=endpoint_name) _predict_and_assert(predictor)
def test_async_kmeans(sagemaker_session): training_job_name = "" endpoint_name = name_from_base('kmeans') with timeout(minutes=5): data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'} # Load the data into memory as numpy arrays with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) kmeans = KMeans(role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge', k=10, sagemaker_session=sagemaker_session, base_job_name='test-kmeans') kmeans.init_method = 'random' kmeans.max_iterations = 1 kmeans.tol = 1 kmeans.num_trials = 1 kmeans.local_init_method = 'kmeans++' kmeans.half_life_time_size = 1 kmeans.epochs = 1 kmeans.center_factor = 1 assert kmeans.hyperparameters() == dict( init_method=kmeans.init_method, local_lloyd_max_iter=str(kmeans.max_iterations), local_lloyd_tol=str(kmeans.tol), local_lloyd_num_trials=str(kmeans.num_trials), local_lloyd_init_method=kmeans.local_init_method, half_life_time_size=str(kmeans.half_life_time_size), epochs=str(kmeans.epochs), extra_center_factor=str(kmeans.center_factor), k=str(kmeans.k), force_dense='True', ) kmeans.fit(kmeans.record_set(train_set[0][:100]), wait=False) training_job_name = kmeans.latest_training_job.name print("Detached from training job. Will re-attach in 20 seconds") time.sleep(20) print("attaching now...") with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): estimator = KMeans.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session) model = KMeansModel(estimator.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) result = predictor.predict(train_set[0][:10]) assert len(result) == 10 for record in result: assert record.label["closest_cluster"] is not None assert record.label["distance_to_cluster"] is not None
def test_async_byo_estimator(sagemaker_session, region): image_name = registry(region) + "/factorization-machines:1" endpoint_name = name_from_base('byo') training_job_name = "" with timeout(minutes=5): data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'} with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) # take 100 examples for faster execution vectors = np.array([t.tolist() for t in train_set[0][:100]]).astype('float32') labels = np.where(np.array([t.tolist() for t in train_set[1][:100]]) == 0, 1.0, 0.0).astype('float32') buf = io.BytesIO() write_numpy_to_dense_tensor(buf, vectors, labels) buf.seek(0) bucket = sagemaker_session.default_bucket() prefix = 'test_byo_estimator' key = 'recordio-pb-data' boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train', key)).upload_fileobj(buf) s3_train_data = 's3://{}/{}/train/{}'.format(bucket, prefix, key) estimator = Estimator(image_name=image_name, role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge', sagemaker_session=sagemaker_session, base_job_name='test-byo') estimator.set_hyperparameters(num_factors=10, feature_dim=784, mini_batch_size=100, predictor_type='binary_classifier') # training labels must be 'float32' estimator.fit({'train': s3_train_data}, wait=False) training_job_name = estimator.latest_training_job.name with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): estimator = Estimator.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session) model = estimator.create_model() predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name) predictor.serializer = fm_serializer predictor.content_type = 'application/json' predictor.deserializer = sagemaker.predictor.json_deserializer result = predictor.predict(train_set[0][:10]) assert len(result['predictions']) == 10 for prediction in result['predictions']: assert prediction['score'] is not None assert estimator.train_image() == image_name
def test_byo_estimator(sagemaker_session, region): """Use Factorization Machines algorithm as an example here. First we need to prepare data for training. We take standard data set, convert it to the format that the algorithm can process and upload it to S3. Then we create the Estimator and set hyperparamets as required by the algorithm. Next, we can call fit() with path to the S3. Later the trained model is deployed and prediction is called against the endpoint. Default predictor is updated with json serializer and deserializer. """ image_name = registry(region) + "/factorization-machines:1" training_data_path = os.path.join(DATA_DIR, 'dummy_tensor') with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'} with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) prefix = 'test_byo_estimator' key = 'recordio-pb-data' s3_train_data = sagemaker_session.upload_data(path=training_data_path, key_prefix=os.path.join(prefix, 'train', key)) estimator = Estimator(image_name=image_name, role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge', sagemaker_session=sagemaker_session, base_job_name='test-byo') estimator.set_hyperparameters(num_factors=10, feature_dim=784, mini_batch_size=100, predictor_type='binary_classifier') # training labels must be 'float32' estimator.fit({'train': s3_train_data}) endpoint_name = name_from_base('byo') with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): model = estimator.create_model() predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name) predictor.serializer = fm_serializer predictor.content_type = 'application/json' predictor.deserializer = sagemaker.predictor.json_deserializer result = predictor.predict(train_set[0][:10]) assert len(result['predictions']) == 10 for prediction in result['predictions']: assert prediction['score'] is not None
def test_deploy_model(mxnet_training_job, sagemaker_session): endpoint_name = 'test-mxnet-deploy-model-{}'.format(sagemaker_timestamp()) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): desc = sagemaker_session.sagemaker_client.describe_training_job(TrainingJobName=mxnet_training_job) model_data = desc['ModelArtifacts']['S3ModelArtifacts'] script_path = os.path.join(DATA_DIR, 'mxnet_mnist', 'mnist.py') model = MXNetModel(model_data, 'SageMakerRole', entry_point=script_path, sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name) data = numpy.zeros(shape=(1, 1, 28, 28)) predictor.predict(data)
def test_tuning_kmeans(sagemaker_session): with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES): data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'} # Load the data into memory as numpy arrays with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) kmeans = KMeans(role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge', k=10, sagemaker_session=sagemaker_session, base_job_name='tk', output_path='s3://{}/'.format(sagemaker_session.default_bucket())) # set kmeans specific hp kmeans.init_method = 'random' kmeans.max_iterators = 1 kmeans.tol = 1 kmeans.num_trials = 1 kmeans.local_init_method = 'kmeans++' kmeans.half_life_time_size = 1 kmeans.epochs = 1 records = kmeans.record_set(train_set[0][:100]) test_records = kmeans.record_set(train_set[0][:100], channel='test') # specify which hp you want to optimize over hyperparameter_ranges = {'extra_center_factor': IntegerParameter(1, 10), 'mini_batch_size': IntegerParameter(10, 100), 'epochs': IntegerParameter(1, 2), 'init_method': CategoricalParameter(['kmeans++', 'random'])} objective_metric_name = 'test:msd' tuner = HyperparameterTuner(estimator=kmeans, objective_metric_name=objective_metric_name, hyperparameter_ranges=hyperparameter_ranges, objective_type='Minimize', max_jobs=2, max_parallel_jobs=2) tuner.fit([records, test_records]) print('Started hyperparameter tuning job with name:' + tuner.latest_tuning_job.name) time.sleep(15) tuner.wait() best_training_job = tuner.best_training_job() with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session): predictor = tuner.deploy(1, 'ml.c4.xlarge') result = predictor.predict(train_set[0][:10]) assert len(result) == 10 for record in result: assert record.label['closest_cluster'] is not None assert record.label['distance_to_cluster'] is not None
def test_sync_fit_deploy(pytorch_training_job, sagemaker_session): # TODO: add tests against local mode when it's ready to be used endpoint_name = 'test-pytorch-sync-fit-attach-deploy{}'.format(sagemaker_timestamp()) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): estimator = PyTorch.attach(pytorch_training_job, sagemaker_session=sagemaker_session) predictor = estimator.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) data = numpy.zeros(shape=(1, 1, 28, 28), dtype=numpy.float32) predictor.predict(data) batch_size = 100 data = numpy.random.rand(batch_size, 1, 28, 28).astype(numpy.float32) output = predictor.predict(data) assert output.shape == (batch_size, 10)
def test_deploy_model(pytorch_training_job, sagemaker_session): endpoint_name = 'test-pytorch-deploy-model-{}'.format(sagemaker_timestamp()) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): desc = sagemaker_session.sagemaker_client.describe_training_job(TrainingJobName=pytorch_training_job) model_data = desc['ModelArtifacts']['S3ModelArtifacts'] model = PyTorchModel(model_data, 'SageMakerRole', entry_point=MNIST_SCRIPT, sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name) batch_size = 100 data = numpy.random.rand(batch_size, 1, 28, 28).astype(numpy.float32) output = predictor.predict(data) assert output.shape == (batch_size, 10)
def test_tuning_chainer(sagemaker_session): with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES): script_path = os.path.join(DATA_DIR, 'chainer_mnist', 'mnist.py') data_path = os.path.join(DATA_DIR, 'chainer_mnist') estimator = Chainer(entry_point=script_path, role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge', sagemaker_session=sagemaker_session, hyperparameters={'epochs': 1}) train_input = estimator.sagemaker_session.upload_data(path=os.path.join(data_path, 'train'), key_prefix='integ-test-data/chainer_mnist/train') test_input = estimator.sagemaker_session.upload_data(path=os.path.join(data_path, 'test'), key_prefix='integ-test-data/chainer_mnist/test') hyperparameter_ranges = {'alpha': ContinuousParameter(0.001, 0.005)} objective_metric_name = 'Validation-accuracy' metric_definitions = [ {'Name': 'Validation-accuracy', 'Regex': '\[J1\s+\d\.\d+\s+\d\.\d+\s+\d\.\d+\s+(\d\.\d+)'}] tuner = HyperparameterTuner(estimator, objective_metric_name, hyperparameter_ranges, metric_definitions, max_jobs=2, max_parallel_jobs=2) tuner.fit({'train': train_input, 'test': test_input}) print('Started hyperparameter tuning job with name:' + tuner.latest_tuning_job.name) time.sleep(15) tuner.wait() best_training_job = tuner.best_training_job() with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session): predictor = tuner.deploy(1, 'ml.c4.xlarge') batch_size = 100 data = np.zeros((batch_size, 784), dtype='float32') output = predictor.predict(data) assert len(output) == batch_size data = np.zeros((batch_size, 1, 28, 28), dtype='float32') output = predictor.predict(data) assert len(output) == batch_size data = np.zeros((batch_size, 28, 28), dtype='float32') output = predictor.predict(data) assert len(output) == batch_size
def test_async_fit(sagemaker_session): endpoint_name = 'test-chainer-attach-deploy-{}'.format(sagemaker_timestamp()) with timeout(minutes=5): training_job_name = _run_mnist_training_job(sagemaker_session, "ml.c4.xlarge", 1, chainer_full_version=CHAINER_VERSION, wait=False) print("Waiting to re-attach to the training job: %s" % training_job_name) time.sleep(20) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): print("Re-attaching now to: %s" % training_job_name) estimator = Chainer.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session) predictor = estimator.deploy(1, "ml.c4.xlarge", endpoint_name=endpoint_name) _predict_and_assert(predictor)
def test_async_byo_estimator(sagemaker_session, region): image_name = registry(region) + "/factorization-machines:1" endpoint_name = name_from_base('byo') training_data_path = os.path.join(DATA_DIR, 'dummy_tensor') training_job_name = "" with timeout(minutes=5): data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'} with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) prefix = 'test_byo_estimator' key = 'recordio-pb-data' s3_train_data = sagemaker_session.upload_data(path=training_data_path, key_prefix=os.path.join(prefix, 'train', key)) estimator = Estimator(image_name=image_name, role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge', sagemaker_session=sagemaker_session, base_job_name='test-byo') estimator.set_hyperparameters(num_factors=10, feature_dim=784, mini_batch_size=100, predictor_type='binary_classifier') # training labels must be 'float32' estimator.fit({'train': s3_train_data}, wait=False) training_job_name = estimator.latest_training_job.name with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): estimator = Estimator.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session) model = estimator.create_model() predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name) predictor.serializer = fm_serializer predictor.content_type = 'application/json' predictor.deserializer = sagemaker.predictor.json_deserializer result = predictor.predict(train_set[0][:10]) assert len(result['predictions']) == 10 for prediction in result['predictions']: assert prediction['score'] is not None assert estimator.train_image() == image_name
def test_tuning_lda(sagemaker_session): with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES): data_path = os.path.join(DATA_DIR, 'lda') data_filename = 'nips-train_1.pbr' with open(os.path.join(data_path, data_filename), 'rb') as f: all_records = read_records(f) # all records must be same feature_num = int(all_records[0].features['values'].float32_tensor.shape[0]) lda = LDA(role='SageMakerRole', train_instance_type='ml.c4.xlarge', num_topics=10, sagemaker_session=sagemaker_session, base_job_name='test-lda') record_set = prepare_record_set_from_local_files(data_path, lda.data_location, len(all_records), feature_num, sagemaker_session) test_record_set = prepare_record_set_from_local_files(data_path, lda.data_location, len(all_records), feature_num, sagemaker_session) test_record_set.channel = 'test' # specify which hp you want to optimize over hyperparameter_ranges = {'alpha0': ContinuousParameter(1, 10), 'num_topics': IntegerParameter(1, 2)} objective_metric_name = 'test:pwll' tuner = HyperparameterTuner(estimator=lda, objective_metric_name=objective_metric_name, hyperparameter_ranges=hyperparameter_ranges, objective_type='Maximize', max_jobs=2, max_parallel_jobs=2) tuner.fit([record_set, test_record_set], mini_batch_size=1) print('Started hyperparameter tuning job with name:' + tuner.latest_tuning_job.name) time.sleep(15) tuner.wait() best_training_job = tuner.best_training_job() with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session): predictor = tuner.deploy(1, 'ml.c4.xlarge') predict_input = np.random.rand(1, feature_num) result = predictor.predict(predict_input) assert len(result) == 1 for record in result: assert record.label['topic_mixture'] is not None
def test_tuning_tf(sagemaker_session): with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES): script_path = os.path.join(DATA_DIR, 'iris', 'iris-dnn-classifier.py') estimator = TensorFlow(entry_point=script_path, role='SageMakerRole', training_steps=1, evaluation_steps=1, hyperparameters={'input_tensor_name': 'inputs'}, train_instance_count=1, train_instance_type='ml.c4.xlarge', sagemaker_session=sagemaker_session, base_job_name='tune-tf') inputs = sagemaker_session.upload_data(path=DATA_PATH, key_prefix='integ-test-data/tf_iris') hyperparameter_ranges = {'learning_rate': ContinuousParameter(0.05, 0.2)} objective_metric_name = 'loss' metric_definitions = [{'Name': 'loss', 'Regex': 'loss = ([0-9\\.]+)'}] tuner = HyperparameterTuner(estimator, objective_metric_name, hyperparameter_ranges, metric_definitions, objective_type='Minimize', max_jobs=2, max_parallel_jobs=2) tuner.fit(inputs) print('Started hyperparameter tuning job with name:' + tuner.latest_tuning_job.name) time.sleep(15) tuner.wait() best_training_job = tuner.best_training_job() with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session): predictor = tuner.deploy(1, 'ml.c4.xlarge') features = [6.4, 3.2, 4.5, 1.5] dict_result = predictor.predict({'inputs': features}) print('predict result: {}'.format(dict_result)) list_result = predictor.predict(features) print('predict result: {}'.format(list_result)) assert dict_result == list_result
def test_attach_tuning_pytorch(sagemaker_session): mnist_dir = os.path.join(DATA_DIR, 'pytorch_mnist') mnist_script = os.path.join(mnist_dir, 'mnist.py') estimator = PyTorch(entry_point=mnist_script, role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge', sagemaker_session=sagemaker_session) with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES): objective_metric_name = 'evaluation-accuracy' metric_definitions = [{'Name': 'evaluation-accuracy', 'Regex': 'Overall test accuracy: (\d+)'}] hyperparameter_ranges = {'batch-size': IntegerParameter(50, 100)} tuner = HyperparameterTuner(estimator, objective_metric_name, hyperparameter_ranges, metric_definitions, max_jobs=2, max_parallel_jobs=2) training_data = estimator.sagemaker_session.upload_data(path=os.path.join(mnist_dir, 'training'), key_prefix='integ-test-data/pytorch_mnist/training') tuner.fit({'training': training_data}) tuning_job_name = tuner.latest_tuning_job.name print('Started hyperparameter tuning job with name:' + tuning_job_name) time.sleep(15) tuner.wait() attached_tuner = HyperparameterTuner.attach(tuning_job_name, sagemaker_session=sagemaker_session) best_training_job = tuner.best_training_job() with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session): predictor = attached_tuner.deploy(1, 'ml.c4.xlarge') data = np.zeros(shape=(1, 1, 28, 28), dtype=np.float32) predictor.predict(data) batch_size = 100 data = np.random.rand(batch_size, 1, 28, 28).astype(np.float32) output = predictor.predict(data) assert output.shape == (batch_size, 10)
def test_async_knn_classifier(sagemaker_session): training_job_name = "" endpoint_name = name_from_base('knn') with timeout(minutes=5): data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'} # Load the data into memory as numpy arrays with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) knn = KNN(role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge', k=10, predictor_type='classifier', sample_size=500, index_type='faiss.IVFFlat', index_metric='L2', sagemaker_session=sagemaker_session, base_job_name='test-knn-cl') # training labels must be 'float32' knn.fit(knn.record_set(train_set[0][:200], train_set[1][:200].astype('float32')), wait=False) training_job_name = knn.latest_training_job.name print("Detached from training job. Will re-attach in 20 seconds") time.sleep(20) print("attaching now...") with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): estimator = KNN.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session) model = KNNModel(estimator.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) result = predictor.predict(train_set[0][:10]) assert len(result) == 10 for record in result: assert record.label["score"] is not None
def test_randomcutforest(sagemaker_session): with timeout(minutes=15): # Generate a thousand 14-dimensional datapoints. feature_num = 14 train_input = np.random.rand(1000, feature_num) rcf = RandomCutForest(role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge', num_trees=50, num_samples_per_tree=20, sagemaker_session=sagemaker_session, base_job_name='test-randomcutforest') rcf.fit(rcf.record_set(train_input)) endpoint_name = name_from_base('randomcutforest') with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20): model = RandomCutForestModel(rcf.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) predict_input = np.random.rand(1, feature_num) result = predictor.predict(predict_input) assert len(result) == 1 for record in result: assert record.label["score"] is not None assert len(record.label["score"].float32_tensor.values) == 1
def test_tuning_byo_estimator(sagemaker_session, cpu_instance_type): """Use Factorization Machines algorithm as an example here. First we need to prepare data for training. We take standard data set, convert it to the format that the algorithm can process and upload it to S3. Then we create the Estimator and set hyperparamets as required by the algorithm. Next, we can call fit() with path to the S3. Later the trained model is deployed and prediction is called against the endpoint. Default predictor is updated with json serializer and deserializer. """ image_uri = image_uris.retrieve("factorization-machines", sagemaker_session.boto_region_name) training_data_path = os.path.join(DATA_DIR, "dummy_tensor") with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES): prefix = "test_byo_estimator" key = "recordio-pb-data" s3_train_data = sagemaker_session.upload_data(path=training_data_path, key_prefix=os.path.join( prefix, "train", key)) estimator = Estimator( image_uri=image_uri, role="SageMakerRole", instance_count=1, instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, ) estimator.set_hyperparameters(num_factors=10, feature_dim=784, mini_batch_size=100, predictor_type="binary_classifier") hyperparameter_ranges = {"mini_batch_size": IntegerParameter(100, 200)} tuner = HyperparameterTuner( estimator=estimator, objective_metric_name="test:binary_classification_accuracy", hyperparameter_ranges=hyperparameter_ranges, max_jobs=2, max_parallel_jobs=2, ) tuning_job_name = unique_name_from_base("byo", 32) print("Started hyperparameter tuning job with name {}:".format( tuning_job_name)) tuner.fit( { "train": s3_train_data, "test": s3_train_data }, include_cls_metadata=False, job_name=tuning_job_name, ) best_training_job = tuner.best_training_job() with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session): predictor = tuner.deploy( 1, cpu_instance_type, endpoint_name=best_training_job, serializer=_FactorizationMachineSerializer(), deserializer=JSONDeserializer(), ) result = predictor.predict(datasets.one_p_mnist()[0][:10]) assert len(result["predictions"]) == 10 for prediction in result["predictions"]: assert prediction["score"] is not None
def test_async_linear_learner(sagemaker_session, cpu_instance_type): job_name = unique_name_from_base("linear-learner") with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz") pickle_args = {} if sys.version_info.major == 2 else { "encoding": "latin1" } # Load the data into memory as numpy arrays with gzip.open(data_path, "rb") as f: train_set, _, _ = pickle.load(f, **pickle_args) train_set[1][:100] = 1 train_set[1][100:200] = 0 train_set = train_set[0], train_set[1].astype(np.dtype("float32")) ll = LinearLearner( "SageMakerRole", 1, cpu_instance_type, predictor_type="binary_classifier", sagemaker_session=sagemaker_session, ) ll.binary_classifier_model_selection_criteria = "accuracy" ll.target_recall = 0.5 ll.target_precision = 0.5 ll.positive_example_weight_mult = 0.1 ll.epochs = 1 ll.use_bias = True ll.num_models = 1 ll.num_calibration_samples = 1 ll.init_method = "uniform" ll.init_scale = 0.5 ll.init_sigma = 0.2 ll.init_bias = 5 ll.optimizer = "adam" ll.loss = "logistic" ll.wd = 0.5 ll.l1 = 0.5 ll.momentum = 0.5 ll.learning_rate = 0.1 ll.beta_1 = 0.1 ll.beta_2 = 0.1 ll.use_lr_scheduler = True ll.lr_scheduler_step = 2 ll.lr_scheduler_factor = 0.5 ll.lr_scheduler_minimum_lr = 0.1 ll.normalize_data = False ll.normalize_label = False ll.unbias_data = True ll.unbias_label = False ll.num_point_for_scaler = 10000 ll.margin = 1.0 ll.quantile = 0.5 ll.loss_insensitivity = 0.1 ll.huber_delta = 0.1 ll.early_stopping_tolerance = 0.0001 ll.early_stopping_patience = 3 ll.fit(ll.record_set(train_set[0][:200], train_set[1][:200]), wait=False, job_name=job_name) print("Waiting to re-attach to the training job: %s" % job_name) time.sleep(20) with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): estimator = LinearLearner.attach(training_job_name=job_name, sagemaker_session=sagemaker_session) model = LinearLearnerModel(estimator.model_data, role="SageMakerRole", sagemaker_session=sagemaker_session) predictor = model.deploy(1, cpu_instance_type, endpoint_name=job_name) result = predictor.predict(train_set[0][0:100]) assert len(result) == 100 for record in result: assert record.label["predicted_label"] is not None assert record.label["score"] is not None
def test_tuning_kmeans(sagemaker_session): with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES): data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else { 'encoding': 'latin1' } # Load the data into memory as numpy arrays with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) kmeans = KMeans(role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge', k=10, sagemaker_session=sagemaker_session, base_job_name='tk', output_path='s3://{}/'.format( sagemaker_session.default_bucket())) # set kmeans specific hp kmeans.init_method = 'random' kmeans.max_iterators = 1 kmeans.tol = 1 kmeans.num_trials = 1 kmeans.local_init_method = 'kmeans++' kmeans.half_life_time_size = 1 kmeans.epochs = 1 records = kmeans.record_set(train_set[0][:100]) test_records = kmeans.record_set(train_set[0][:100], channel='test') # specify which hp you want to optimize over hyperparameter_ranges = { 'extra_center_factor': IntegerParameter(1, 10), 'mini_batch_size': IntegerParameter(10, 100), 'epochs': IntegerParameter(1, 2), 'init_method': CategoricalParameter(['kmeans++', 'random']) } objective_metric_name = 'test:msd' tuner = HyperparameterTuner( estimator=kmeans, objective_metric_name=objective_metric_name, hyperparameter_ranges=hyperparameter_ranges, objective_type='Minimize', max_jobs=2, max_parallel_jobs=2) tuner.fit([records, test_records]) print('Started hyperparameter tuning job with name:' + tuner.latest_tuning_job.name) time.sleep(15) tuner.wait() best_training_job = tuner.best_training_job() with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session): predictor = tuner.deploy(1, 'ml.c4.xlarge') result = predictor.predict(train_set[0][:10]) assert len(result) == 10 for record in result: assert record.label['closest_cluster'] is not None assert record.label['distance_to_cluster'] is not None
def test_inference_pipeline_model_deploy_with_update_endpoint( sagemaker_session, cpu_instance_type, alternative_cpu_instance_type): sparkml_data_path = os.path.join(DATA_DIR, "sparkml_model") xgboost_data_path = os.path.join(DATA_DIR, "xgboost_model") endpoint_name = "test-inference-pipeline-deploy-{}".format( sagemaker_timestamp()) sparkml_model_data = sagemaker_session.upload_data( path=os.path.join(sparkml_data_path, "mleap_model.tar.gz"), key_prefix="integ-test-data/sparkml/model", ) xgb_model_data = sagemaker_session.upload_data( path=os.path.join(xgboost_data_path, "xgb_model.tar.gz"), key_prefix="integ-test-data/xgboost/model", ) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): sparkml_model = SparkMLModel( model_data=sparkml_model_data, env={"SAGEMAKER_SPARKML_SCHEMA": SCHEMA}, sagemaker_session=sagemaker_session, ) xgb_image = get_image_uri(sagemaker_session.boto_region_name, "xgboost") xgb_model = Model(model_data=xgb_model_data, image=xgb_image, sagemaker_session=sagemaker_session) model = PipelineModel( models=[sparkml_model, xgb_model], role="SageMakerRole", sagemaker_session=sagemaker_session, ) model.deploy(1, alternative_cpu_instance_type, endpoint_name=endpoint_name) old_endpoint = sagemaker_session.sagemaker_client.describe_endpoint( EndpointName=endpoint_name) old_config_name = old_endpoint["EndpointConfigName"] model.deploy(1, cpu_instance_type, update_endpoint=True, endpoint_name=endpoint_name) # Wait for endpoint to finish updating max_retry_count = 40 # Endpoint update takes ~7min. 40 retries * 30s sleeps = 20min timeout current_retry_count = 0 while current_retry_count <= max_retry_count: if current_retry_count >= max_retry_count: raise Exception( "Endpoint status not 'InService' within expected timeout.") time.sleep(30) new_endpoint = sagemaker_session.sagemaker_client.describe_endpoint( EndpointName=endpoint_name) current_retry_count += 1 if new_endpoint["EndpointStatus"] == "InService": break new_config_name = new_endpoint["EndpointConfigName"] new_config = sagemaker_session.sagemaker_client.describe_endpoint_config( EndpointConfigName=new_config_name) assert old_config_name != new_config_name assert new_config["ProductionVariants"][0][ "InstanceType"] == cpu_instance_type assert new_config["ProductionVariants"][0]["InitialInstanceCount"] == 1 model.delete_model() with pytest.raises(Exception) as exception: sagemaker_session.sagemaker_client.describe_model(ModelName=model.name) assert "Could not find model" in str(exception.value)
def test_byo_estimator(sagemaker_session, region): """Use Factorization Machines algorithm as an example here. First we need to prepare data for training. We take standard data set, convert it to the format that the algorithm can process and upload it to S3. Then we create the Estimator and set hyperparamets as required by the algorithm. Next, we can call fit() with path to the S3. Later the trained model is deployed and prediction is called against the endpoint. Default predictor is updated with json serializer and deserializer. """ image_name = registry(region) + "/factorization-machines:1" with timeout(minutes=15): data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'} with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) # take 100 examples for faster execution vectors = np.array([t.tolist() for t in train_set[0][:100]]).astype('float32') labels = np.where(np.array([t.tolist() for t in train_set[1][:100]]) == 0, 1.0, 0.0).astype('float32') buf = io.BytesIO() write_numpy_to_dense_tensor(buf, vectors, labels) buf.seek(0) bucket = sagemaker_session.default_bucket() prefix = 'test_byo_estimator' key = 'recordio-pb-data' boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train', key)).upload_fileobj(buf) s3_train_data = 's3://{}/{}/train/{}'.format(bucket, prefix, key) estimator = Estimator(image_name=image_name, role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge', sagemaker_session=sagemaker_session, base_job_name='test-byo') estimator.set_hyperparameters(num_factors=10, feature_dim=784, mini_batch_size=100, predictor_type='binary_classifier') # training labels must be 'float32' estimator.fit({'train': s3_train_data}) endpoint_name = name_from_base('byo') with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20): model = estimator.create_model() predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name) predictor.serializer = fm_serializer predictor.content_type = 'application/json' predictor.deserializer = sagemaker.predictor.json_deserializer result = predictor.predict(train_set[0][:10]) assert len(result['predictions']) == 10 for prediction in result['predictions']: assert prediction['score'] is not None
def test_tuning_lda(sagemaker_session, cpu_instance_type): with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES): data_path = os.path.join(DATA_DIR, "lda") data_filename = "nips-train_1.pbr" with open(os.path.join(data_path, data_filename), "rb") as f: all_records = read_records(f) # all records must be same feature_num = int( all_records[0].features["values"].float32_tensor.shape[0]) lda = LDA( role="SageMakerRole", instance_type=cpu_instance_type, num_topics=10, sagemaker_session=sagemaker_session, ) record_set = prepare_record_set_from_local_files( data_path, lda.data_location, len(all_records), feature_num, sagemaker_session) test_record_set = prepare_record_set_from_local_files( data_path, lda.data_location, len(all_records), feature_num, sagemaker_session) test_record_set.channel = "test" # specify which hp you want to optimize over hyperparameter_ranges = { "alpha0": ContinuousParameter(1, 10), "num_topics": IntegerParameter(1, 2), } objective_metric_name = "test:pwll" tuner = HyperparameterTuner( estimator=lda, objective_metric_name=objective_metric_name, hyperparameter_ranges=hyperparameter_ranges, objective_type="Maximize", max_jobs=2, max_parallel_jobs=2, early_stopping_type="Auto", ) tuning_job_name = unique_name_from_base("test-lda", max_length=32) print("Started hyperparameter tuning job with name:" + tuning_job_name) tuner.fit([record_set, test_record_set], mini_batch_size=1, job_name=tuning_job_name) attached_tuner = HyperparameterTuner.attach( tuning_job_name, sagemaker_session=sagemaker_session) assert attached_tuner.early_stopping_type == "Auto" assert attached_tuner.estimator.alpha0 == 1.0 assert attached_tuner.estimator.num_topics == 1 best_training_job = attached_tuner.best_training_job() with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session): predictor = tuner.deploy(1, cpu_instance_type) predict_input = np.random.rand(1, feature_num) result = predictor.predict(predict_input) assert len(result) == 1 for record in result: assert record.label["topic_mixture"] is not None
def test_multi_data_model_deploy_pretrained_models_update_endpoint( container_image, sagemaker_session, cpu_instance_type, alternative_cpu_instance_type): timestamp = sagemaker_timestamp() endpoint_name = "test-multimodel-endpoint-{}".format(timestamp) model_name = "test-multimodel-{}".format(timestamp) # Define pretrained model local path pretrained_model_data_local_path = os.path.join(DATA_DIR, "sparkml_model", "mleap_model.tar.gz") with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): model_data_prefix = os.path.join("s3://", sagemaker_session.default_bucket(), "multimodel-{}/".format(timestamp)) multi_data_model = MultiDataModel( name=model_name, model_data_prefix=model_data_prefix, image=container_image, role=ROLE, sagemaker_session=sagemaker_session, ) # Add model before deploy multi_data_model.add_model(pretrained_model_data_local_path, PRETRAINED_MODEL_PATH_1) # Deploy model to an endpoint multi_data_model.deploy(1, cpu_instance_type, endpoint_name=endpoint_name) # Add model after deploy multi_data_model.add_model(pretrained_model_data_local_path, PRETRAINED_MODEL_PATH_2) # List model assertions endpoint_models = [] for model_path in multi_data_model.list_models(): endpoint_models.append(model_path) assert PRETRAINED_MODEL_PATH_1 in endpoint_models assert PRETRAINED_MODEL_PATH_2 in endpoint_models predictor = RealTimePredictor( endpoint=endpoint_name, sagemaker_session=sagemaker_session, serializer=npy_serializer, deserializer=string_deserializer, ) data = numpy.zeros(shape=(1, 1, 28, 28)) result = predictor.predict(data, target_model=PRETRAINED_MODEL_PATH_1) assert result == "Invoked model: {}".format(PRETRAINED_MODEL_PATH_1) result = predictor.predict(data, target_model=PRETRAINED_MODEL_PATH_2) assert result == "Invoked model: {}".format(PRETRAINED_MODEL_PATH_2) old_endpoint = sagemaker_session.sagemaker_client.describe_endpoint( EndpointName=endpoint_name) old_config_name = old_endpoint["EndpointConfigName"] # Update endpoint multi_data_model.deploy(1, alternative_cpu_instance_type, endpoint_name=endpoint_name, update_endpoint=True) # Wait for endpoint to finish updating for _ in retries(40, "Waiting for 'InService' endpoint status", seconds_to_sleep=30): new_endpoint = sagemaker_session.sagemaker_client.describe_endpoint( EndpointName=endpoint_name) if new_endpoint["EndpointStatus"] == "InService": break new_config_name = new_endpoint["EndpointConfigName"] new_config = sagemaker_session.sagemaker_client.describe_endpoint_config( EndpointConfigName=new_config_name) assert old_config_name != new_config_name assert new_config["ProductionVariants"][0][ "InstanceType"] == alternative_cpu_instance_type assert new_config["ProductionVariants"][0]["InitialInstanceCount"] == 1 # Cleanup sagemaker_session.sagemaker_client.delete_endpoint_config( EndpointConfigName=old_config_name) sagemaker_session.sagemaker_client.delete_endpoint_config( EndpointConfigName=new_config_name) multi_data_model.delete_model() with pytest.raises(Exception) as exception: sagemaker_session.sagemaker_client.describe_model(ModelName=model_name) assert "Could not find model" in str(exception.value) sagemaker_session.sagemaker_client.describe_endpoint_config( name=old_config_name) assert "Could not find endpoint" in str(exception.value) sagemaker_session.sagemaker_client.describe_endpoint_config( name=new_config_name) assert "Could not find endpoint" in str(exception.value)
def test_tuning_lda(sagemaker_session): with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES): data_path = os.path.join(DATA_DIR, 'lda') data_filename = 'nips-train_1.pbr' with open(os.path.join(data_path, data_filename), 'rb') as f: all_records = read_records(f) # all records must be same feature_num = int( all_records[0].features['values'].float32_tensor.shape[0]) lda = LDA(role='SageMakerRole', train_instance_type='ml.c4.xlarge', num_topics=10, sagemaker_session=sagemaker_session) record_set = prepare_record_set_from_local_files( data_path, lda.data_location, len(all_records), feature_num, sagemaker_session) test_record_set = prepare_record_set_from_local_files( data_path, lda.data_location, len(all_records), feature_num, sagemaker_session) test_record_set.channel = 'test' # specify which hp you want to optimize over hyperparameter_ranges = { 'alpha0': ContinuousParameter(1, 10), 'num_topics': IntegerParameter(1, 2) } objective_metric_name = 'test:pwll' tuner = HyperparameterTuner( estimator=lda, objective_metric_name=objective_metric_name, hyperparameter_ranges=hyperparameter_ranges, objective_type='Maximize', max_jobs=2, max_parallel_jobs=2, early_stopping_type='Auto') tuning_job_name = unique_name_from_base('test-lda', max_length=32) tuner.fit([record_set, test_record_set], mini_batch_size=1, job_name=tuning_job_name) latest_tuning_job_name = tuner.latest_tuning_job.name print('Started hyperparameter tuning job with name:' + latest_tuning_job_name) time.sleep(15) tuner.wait() desc = tuner.latest_tuning_job.sagemaker_session.sagemaker_client \ .describe_hyper_parameter_tuning_job(HyperParameterTuningJobName=latest_tuning_job_name) assert desc['HyperParameterTuningJobConfig'][ 'TrainingJobEarlyStoppingType'] == 'Auto' best_training_job = tuner.best_training_job() with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session): predictor = tuner.deploy(1, 'ml.c4.xlarge') predict_input = np.random.rand(1, feature_num) result = predictor.predict(predict_input) assert len(result) == 1 for record in result: assert record.label['topic_mixture'] is not None
def test_tuning_chainer(sagemaker_session): with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES): script_path = os.path.join(DATA_DIR, 'chainer_mnist', 'mnist.py') data_path = os.path.join(DATA_DIR, 'chainer_mnist') estimator = Chainer(entry_point=script_path, role='SageMakerRole', py_version=PYTHON_VERSION, train_instance_count=1, train_instance_type='ml.c4.xlarge', sagemaker_session=sagemaker_session, hyperparameters={'epochs': 1}) train_input = estimator.sagemaker_session.upload_data( path=os.path.join(data_path, 'train'), key_prefix='integ-test-data/chainer_mnist/train') test_input = estimator.sagemaker_session.upload_data( path=os.path.join(data_path, 'test'), key_prefix='integ-test-data/chainer_mnist/test') hyperparameter_ranges = {'alpha': ContinuousParameter(0.001, 0.005)} objective_metric_name = 'Validation-accuracy' metric_definitions = [{ 'Name': 'Validation-accuracy', 'Regex': r'\[J1\s+\d\.\d+\s+\d\.\d+\s+\d\.\d+\s+(\d\.\d+)' }] tuner = HyperparameterTuner(estimator, objective_metric_name, hyperparameter_ranges, metric_definitions, max_jobs=2, max_parallel_jobs=2) tuning_job_name = unique_name_from_base('chainer', max_length=32) tuner.fit({ 'train': train_input, 'test': test_input }, job_name=tuning_job_name) print('Started hyperparameter tuning job with name:' + tuning_job_name) time.sleep(15) tuner.wait() best_training_job = tuner.best_training_job() with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session): predictor = tuner.deploy(1, 'ml.c4.xlarge') batch_size = 100 data = np.zeros((batch_size, 784), dtype='float32') output = predictor.predict(data) assert len(output) == batch_size data = np.zeros((batch_size, 1, 28, 28), dtype='float32') output = predictor.predict(data) assert len(output) == batch_size data = np.zeros((batch_size, 28, 28), dtype='float32') output = predictor.predict(data) assert len(output) == batch_size
def test_tuning_byo_estimator(sagemaker_session): """Use Factorization Machines algorithm as an example here. First we need to prepare data for training. We take standard data set, convert it to the format that the algorithm can process and upload it to S3. Then we create the Estimator and set hyperparamets as required by the algorithm. Next, we can call fit() with path to the S3. Later the trained model is deployed and prediction is called against the endpoint. Default predictor is updated with json serializer and deserializer. """ image_name = registry(sagemaker_session.boto_session.region_name ) + '/factorization-machines:1' training_data_path = os.path.join(DATA_DIR, 'dummy_tensor') with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES): data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else { 'encoding': 'latin1' } with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) prefix = 'test_byo_estimator' key = 'recordio-pb-data' s3_train_data = sagemaker_session.upload_data(path=training_data_path, key_prefix=os.path.join( prefix, 'train', key)) estimator = Estimator(image_name=image_name, role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge', sagemaker_session=sagemaker_session) estimator.set_hyperparameters(num_factors=10, feature_dim=784, mini_batch_size=100, predictor_type='binary_classifier') hyperparameter_ranges = {'mini_batch_size': IntegerParameter(100, 200)} tuner = HyperparameterTuner( estimator=estimator, objective_metric_name='test:binary_classification_accuracy', hyperparameter_ranges=hyperparameter_ranges, max_jobs=2, max_parallel_jobs=2) tuner.fit({ 'train': s3_train_data, 'test': s3_train_data }, include_cls_metadata=False, job_name=unique_name_from_base('byo', 32)) print('Started hyperparameter tuning job with name:' + tuner.latest_tuning_job.name) time.sleep(15) tuner.wait() best_training_job = tuner.best_training_job() with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session): predictor = tuner.deploy(1, 'ml.m4.xlarge', endpoint_name=best_training_job) predictor.serializer = _fm_serializer predictor.content_type = 'application/json' predictor.deserializer = json_deserializer result = predictor.predict(train_set[0][:10]) assert len(result['predictions']) == 10 for prediction in result['predictions']: assert prediction['score'] is not None
def test_tf_vpc_multi(sagemaker_session, tf_full_version): """Test Tensorflow multi-instance using the same VpcConfig for training and inference""" instance_type = 'ml.c4.xlarge' instance_count = 2 train_input = sagemaker_session.upload_data( path=os.path.join(DATA_DIR, 'iris', 'data'), key_prefix='integ-test-data/tf_iris') script_path = os.path.join(DATA_DIR, 'iris', 'iris-dnn-classifier.py') ec2_client = sagemaker_session.boto_session.client('ec2') subnet_ids, security_group_id = get_or_create_vpc_resources( ec2_client, sagemaker_session.boto_session.region_name) setup_security_group_for_encryption(ec2_client, security_group_id) estimator = TensorFlow(entry_point=script_path, role='SageMakerRole', framework_version=tf_full_version, training_steps=1, evaluation_steps=1, hyperparameters={'input_tensor_name': 'inputs'}, train_instance_count=instance_count, train_instance_type=instance_type, sagemaker_session=sagemaker_session, base_job_name='test-vpc-tf', subnets=subnet_ids, security_group_ids=[security_group_id], encrypt_inter_container_traffic=True) job_name = unique_name_from_base('test-tf-vpc-multi') with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): estimator.fit(train_input, job_name=job_name) print('training job succeeded: {}'.format( estimator.latest_training_job.name)) job_desc = sagemaker_session.sagemaker_client.describe_training_job( TrainingJobName=estimator.latest_training_job.name) assert set(subnet_ids) == set(job_desc['VpcConfig']['Subnets']) assert [security_group_id] == job_desc['VpcConfig']['SecurityGroupIds'] assert job_desc['EnableInterContainerTrafficEncryption'] is True endpoint_name = estimator.latest_training_job.name with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): model = estimator.create_model() json_predictor = model.deploy(initial_instance_count=instance_count, instance_type='ml.c4.xlarge', endpoint_name=endpoint_name) features = [6.4, 3.2, 4.5, 1.5] dict_result = json_predictor.predict({'inputs': features}) print('predict result: {}'.format(dict_result)) list_result = json_predictor.predict(features) print('predict result: {}'.format(list_result)) assert dict_result == list_result model_desc = sagemaker_session.sagemaker_client.describe_model( ModelName=model.name) assert set(subnet_ids) == set(model_desc['VpcConfig']['Subnets']) assert [security_group_id] == model_desc['VpcConfig']['SecurityGroupIds']
def test_attach_tuning_pytorch( sagemaker_session, cpu_instance_type, pytorch_inference_latest_version, pytorch_inference_latest_py_version, ): mnist_dir = os.path.join(DATA_DIR, "pytorch_mnist") mnist_script = os.path.join(mnist_dir, "mnist.py") estimator = PyTorch( entry_point=mnist_script, role="SageMakerRole", instance_count=1, framework_version=pytorch_inference_latest_version, py_version=pytorch_inference_latest_py_version, instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, ) with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES): objective_metric_name = "evaluation-accuracy" metric_definitions = [{ "Name": "evaluation-accuracy", "Regex": r"Overall test accuracy: (\d+)" }] hyperparameter_ranges = {"batch-size": IntegerParameter(50, 100)} tuner = HyperparameterTuner( estimator, objective_metric_name, hyperparameter_ranges, metric_definitions, max_jobs=2, max_parallel_jobs=2, early_stopping_type="Auto", ) training_data = estimator.sagemaker_session.upload_data( path=os.path.join(mnist_dir, "training"), key_prefix="integ-test-data/pytorch_mnist/training", ) tuning_job_name = unique_name_from_base("pytorch", max_length=32) print("Started hyperparameter tuning job with name: {}".format( tuning_job_name)) tuner.fit({"training": training_data}, job_name=tuning_job_name) endpoint_name = tuning_job_name model_name = "model-name-1" attached_tuner = HyperparameterTuner.attach( tuning_job_name, sagemaker_session=sagemaker_session) assert attached_tuner.early_stopping_type == "Auto" with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): predictor = attached_tuner.deploy(1, cpu_instance_type, endpoint_name=endpoint_name, model_name=model_name) best_training_job = tuner.best_training_job() with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session): predictor = attached_tuner.deploy(1, cpu_instance_type) data = np.zeros(shape=(1, 1, 28, 28), dtype=np.float32) predictor.predict(data) batch_size = 100 data = np.random.rand(batch_size, 1, 28, 28).astype(np.float32) output = predictor.predict(data) assert output.shape == (batch_size, 10) _assert_model_name_match(sagemaker_session.sagemaker_client, endpoint_name, model_name)
def test_multi_data_model_deploy_trained_model_from_framework_estimator( container_image, sagemaker_session, cpu_instance_type, mxnet_inference_latest_version, mxnet_inference_latest_py_version, ): timestamp = sagemaker_timestamp() endpoint_name = "test-multimodel-endpoint-{}".format(timestamp) model_name = "test-multimodel-{}".format(timestamp) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): mxnet_model_1 = _mxnet_training_job( sagemaker_session, container_image, mxnet_inference_latest_version, mxnet_inference_latest_py_version, cpu_instance_type, 0.1, ) model_data_prefix = os.path.join("s3://", sagemaker_session.default_bucket(), "multimodel-{}/".format(timestamp)) multi_data_model = MultiDataModel( name=model_name, model_data_prefix=model_data_prefix, model=mxnet_model_1, sagemaker_session=sagemaker_session, ) # Add model before deploy multi_data_model.add_model(mxnet_model_1.model_data, PRETRAINED_MODEL_PATH_1) # Deploy model to an endpoint multi_data_model.deploy(1, cpu_instance_type, endpoint_name=endpoint_name) # Train another model mxnet_model_2 = _mxnet_training_job( sagemaker_session, container_image, mxnet_inference_latest_version, mxnet_inference_latest_py_version, cpu_instance_type, 0.01, ) # Deploy newly trained model multi_data_model.add_model(mxnet_model_2.model_data, PRETRAINED_MODEL_PATH_2) endpoint_models = [] for model_path in multi_data_model.list_models(): endpoint_models.append(model_path) assert PRETRAINED_MODEL_PATH_1 in endpoint_models assert PRETRAINED_MODEL_PATH_2 in endpoint_models # Define a predictor to set `serializer` parameter with `NumpySerializer` # instead of `JSONSerializer` in the default predictor returned by `MXNetPredictor` # Since we are using a placeholder container image the prediction results are not accurate. predictor = Predictor( endpoint_name=endpoint_name, sagemaker_session=sagemaker_session, serializer=NumpySerializer(), deserializer=string_deserializer, ) data = numpy.zeros(shape=(1, 1, 28, 28)) # Prediction result for the first model result = predictor.predict(data, target_model=PRETRAINED_MODEL_PATH_1) assert result == "Invoked model: {}".format(PRETRAINED_MODEL_PATH_1) # Prediction result for the second model result = predictor.predict(data, target_model=PRETRAINED_MODEL_PATH_2) assert result == "Invoked model: {}".format(PRETRAINED_MODEL_PATH_2) # Cleanup sagemaker_session.sagemaker_client.delete_endpoint_config( EndpointConfigName=endpoint_name) multi_data_model.delete_model() with pytest.raises(Exception) as exception: sagemaker_session.sagemaker_client.describe_model(ModelName=model_name) assert "Could not find model" in str(exception.value) sagemaker_session.sagemaker_client.describe_endpoint_config( name=endpoint_name) assert "Could not find endpoint" in str(exception.value)
def test_linear_learner(): with timeout(minutes=15): sagemaker_session = sagemaker.Session(boto_session=boto3.Session( region_name=REGION)) data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else { 'encoding': 'latin1' } # Load the data into memory as numpy arrays with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) train_set[1][:100] = 1 train_set[1][100:200] = 0 train_set = train_set[0], train_set[1].astype(np.dtype('float32')) ll = LinearLearner('SageMakerRole', 1, 'ml.c4.2xlarge', base_job_name='test-linear-learner', sagemaker_session=sagemaker_session) ll.binary_classifier_model_selection_criteria = 'accuracy' ll.target_reacall = 0.5 ll.target_precision = 0.5 ll.positive_example_weight_mult = 0.1 ll.epochs = 1 ll.predictor_type = 'binary_classifier' ll.use_bias = True ll.num_models = 1 ll.num_calibration_samples = 1 ll.init_method = 'uniform' ll.init_scale = 0.5 ll.init_sigma = 0.2 ll.init_bias = 5 ll.optimizer = 'adam' ll.loss = 'logistic' ll.wd = 0.5 ll.l1 = 0.5 ll.momentum = 0.5 ll.learning_rate = 0.1 ll.beta_1 = 0.1 ll.beta_2 = 0.1 ll.use_lr_scheduler = True ll.lr_scheduler_step = 2 ll.lr_scheduler_factor = 0.5 ll.lr_scheduler_minimum_lr = 0.1 ll.normalize_data = False ll.normalize_label = False ll.unbias_data = True ll.unbias_label = False ll.num_point_for_scala = 10000 ll.fit(ll.record_set(train_set[0][:200], train_set[1][:200])) endpoint_name = name_from_base('linear-learner') with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20): model = LinearLearnerModel(ll.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) result = predictor.predict(train_set[0][0:100]) assert len(result) == 100 for record in result: assert record.label["predicted_label"] is not None assert record.label["score"] is not None
def test_tuning_chainer(sagemaker_session, chainer_latest_version, chainer_latest_py_version, cpu_instance_type): with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES): script_path = os.path.join(DATA_DIR, "chainer_mnist", "mnist.py") data_path = os.path.join(DATA_DIR, "chainer_mnist") estimator = Chainer( entry_point=script_path, role="SageMakerRole", framework_version=chainer_latest_version, py_version=chainer_latest_py_version, instance_count=1, instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, hyperparameters={"epochs": 1}, ) train_input = estimator.sagemaker_session.upload_data( path=os.path.join(data_path, "train"), key_prefix="integ-test-data/chainer_mnist/train") test_input = estimator.sagemaker_session.upload_data( path=os.path.join(data_path, "test"), key_prefix="integ-test-data/chainer_mnist/test") hyperparameter_ranges = {"alpha": ContinuousParameter(0.001, 0.005)} objective_metric_name = "Validation-accuracy" metric_definitions = [{ "Name": "Validation-accuracy", "Regex": r"\[J1\s+\d\.\d+\s+\d\.\d+\s+\d\.\d+\s+(\d\.\d+)", }] tuner = HyperparameterTuner( estimator, objective_metric_name, hyperparameter_ranges, metric_definitions, max_jobs=2, max_parallel_jobs=2, ) tuning_job_name = unique_name_from_base("chainer", max_length=32) print("Started hyperparameter tuning job with name: {}".format( tuning_job_name)) tuner.fit({ "train": train_input, "test": test_input }, job_name=tuning_job_name) best_training_job = tuner.best_training_job() with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session): predictor = tuner.deploy(1, cpu_instance_type) batch_size = 100 data = np.zeros((batch_size, 784), dtype="float32") output = predictor.predict(data) assert len(output) == batch_size data = np.zeros((batch_size, 1, 28, 28), dtype="float32") output = predictor.predict(data) assert len(output) == batch_size data = np.zeros((batch_size, 28, 28), dtype="float32") output = predictor.predict(data) assert len(output) == batch_size
def test_multi_data_model_deploy_train_model_from_amazon_first_party_estimator( container_image, sagemaker_session, cpu_instance_type): timestamp = sagemaker_timestamp() endpoint_name = "test-multimodel-endpoint-{}".format(timestamp) model_name = "test-multimodel-{}".format(timestamp) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): rcf_model_v1 = __rcf_training_job(sagemaker_session, container_image, cpu_instance_type, 50, 20) model_data_prefix = os.path.join("s3://", sagemaker_session.default_bucket(), "multimodel-{}/".format(timestamp)) multi_data_model = MultiDataModel( name=model_name, model_data_prefix=model_data_prefix, model=rcf_model_v1, sagemaker_session=sagemaker_session, ) # Add model before deploy multi_data_model.add_model(rcf_model_v1.model_data, PRETRAINED_MODEL_PATH_1) # Deploy model to an endpoint multi_data_model.deploy(1, cpu_instance_type, endpoint_name=endpoint_name) # Train another model rcf_model_v2 = __rcf_training_job(sagemaker_session, container_image, cpu_instance_type, 70, 20) # Deploy newly trained model multi_data_model.add_model(rcf_model_v2.model_data, PRETRAINED_MODEL_PATH_2) # List model assertions endpoint_models = [] for model_path in multi_data_model.list_models(): endpoint_models.append(model_path) assert PRETRAINED_MODEL_PATH_1 in endpoint_models assert PRETRAINED_MODEL_PATH_2 in endpoint_models # Define a predictor to set `serializer` parameter with npy_serializer # instead of `json_serializer` in the default predictor returned by `MXNetPredictor` # Since we are using a placeholder container image the prediction results are not accurate. predictor = RealTimePredictor( endpoint=endpoint_name, sagemaker_session=sagemaker_session, serializer=npy_serializer, deserializer=string_deserializer, ) data = numpy.random.rand(1, 14) # Prediction result for the first model result = predictor.predict(data, target_model=PRETRAINED_MODEL_PATH_1) assert result == "Invoked model: {}".format(PRETRAINED_MODEL_PATH_1) # Prediction result for the second model result = predictor.predict(data, target_model=PRETRAINED_MODEL_PATH_2) assert result == "Invoked model: {}".format(PRETRAINED_MODEL_PATH_2) # Cleanup sagemaker_session.sagemaker_client.delete_endpoint_config( EndpointConfigName=endpoint_name) multi_data_model.delete_model() with pytest.raises(Exception) as exception: sagemaker_session.sagemaker_client.describe_model(ModelName=model_name) assert "Could not find model" in str(exception.value) sagemaker_session.sagemaker_client.describe_endpoint_config( name=endpoint_name) assert "Could not find endpoint" in str(exception.value)
def test_async_linear_learner(sagemaker_session): training_job_name = "" endpoint_name = 'test-linear-learner-async-{}'.format( sagemaker_timestamp()) with timeout(minutes=5): data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else { 'encoding': 'latin1' } # Load the data into memory as numpy arrays with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) train_set[1][:100] = 1 train_set[1][100:200] = 0 train_set = train_set[0], train_set[1].astype(np.dtype('float32')) ll = LinearLearner('SageMakerRole', 1, 'ml.c4.2xlarge', base_job_name='test-linear-learner', predictor_type='binary_classifier', sagemaker_session=sagemaker_session) ll.binary_classifier_model_selection_criteria = 'accuracy' ll.target_recall = 0.5 ll.target_precision = 0.5 ll.positive_example_weight_mult = 0.1 ll.epochs = 1 ll.use_bias = True ll.num_models = 1 ll.num_calibration_samples = 1 ll.init_method = 'uniform' ll.init_scale = 0.5 ll.init_sigma = 0.2 ll.init_bias = 5 ll.optimizer = 'adam' ll.loss = 'logistic' ll.wd = 0.5 ll.l1 = 0.5 ll.momentum = 0.5 ll.learning_rate = 0.1 ll.beta_1 = 0.1 ll.beta_2 = 0.1 ll.use_lr_scheduler = True ll.lr_scheduler_step = 2 ll.lr_scheduler_factor = 0.5 ll.lr_scheduler_minimum_lr = 0.1 ll.normalize_data = False ll.normalize_label = False ll.unbias_data = True ll.unbias_label = False ll.num_point_for_scaler = 10000 ll.margin = 1.0 ll.quantile = 0.5 ll.loss_insensitivity = 0.1 ll.huber_delta = 0.1 ll.early_stopping_tolerance = 0.0001 ll.early_stopping_patience = 3 ll.fit(ll.record_set(train_set[0][:200], train_set[1][:200]), wait=False) training_job_name = ll.latest_training_job.name print("Waiting to re-attach to the training job: %s" % training_job_name) time.sleep(20) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): estimator = LinearLearner.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session) model = LinearLearnerModel(estimator.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) result = predictor.predict(train_set[0][0:100]) assert len(result) == 100 for record in result: assert record.label["predicted_label"] is not None assert record.label["score"] is not None
def test_byo_estimator(sagemaker_session, region): """Use Factorization Machines algorithm as an example here. First we need to prepare data for training. We take standard data set, convert it to the format that the algorithm can process and upload it to S3. Then we create the Estimator and set hyperparamets as required by the algorithm. Next, we can call fit() with path to the S3. Later the trained model is deployed and prediction is called against the endpoint. Default predictor is updated with json serializer and deserializer. """ image_name = registry(region) + "/factorization-machines:1" training_data_path = os.path.join(DATA_DIR, 'dummy_tensor') with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz') pickle_args = {} if sys.version_info.major == 2 else { 'encoding': 'latin1' } with gzip.open(data_path, 'rb') as f: train_set, _, _ = pickle.load(f, **pickle_args) prefix = 'test_byo_estimator' key = 'recordio-pb-data' s3_train_data = sagemaker_session.upload_data(path=training_data_path, key_prefix=os.path.join( prefix, 'train', key)) estimator = Estimator(image_name=image_name, role='SageMakerRole', train_instance_count=1, train_instance_type='ml.c4.xlarge', sagemaker_session=sagemaker_session, base_job_name='test-byo') estimator.set_hyperparameters(num_factors=10, feature_dim=784, mini_batch_size=100, predictor_type='binary_classifier') # training labels must be 'float32' estimator.fit({'train': s3_train_data}) endpoint_name = name_from_base('byo') with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): model = estimator.create_model() predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name) predictor.serializer = fm_serializer predictor.content_type = 'application/json' predictor.deserializer = sagemaker.predictor.json_deserializer result = predictor.predict(train_set[0][:10]) assert len(result['predictions']) == 10 for prediction in result['predictions']: assert prediction['score'] is not None
def test_inference_pipeline_model_deploy(sagemaker_session): sparkml_data_path = os.path.join(DATA_DIR, 'sparkml_model') xgboost_data_path = os.path.join(DATA_DIR, 'xgboost_model') endpoint_name = 'test-inference-pipeline-deploy-{}'.format( sagemaker_timestamp()) sparkml_model_data = sagemaker_session.upload_data( path=os.path.join(sparkml_data_path, 'mleap_model.tar.gz'), key_prefix='integ-test-data/sparkml/model') xgb_model_data = sagemaker_session.upload_data( path=os.path.join(xgboost_data_path, 'xgb_model.tar.gz'), key_prefix='integ-test-data/xgboost/model') schema = json.dumps({ "input": [{ "name": "Pclass", "type": "float" }, { "name": "Embarked", "type": "string" }, { "name": "Age", "type": "float" }, { "name": "Fare", "type": "float" }, { "name": "SibSp", "type": "float" }, { "name": "Sex", "type": "string" }], "output": { "name": "features", "struct": "vector", "type": "double" } }) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): sparkml_model = SparkMLModel(model_data=sparkml_model_data, env={'SAGEMAKER_SPARKML_SCHEMA': schema}, sagemaker_session=sagemaker_session) xgb_image = get_image_uri(sagemaker_session.boto_region_name, 'xgboost') xgb_model = Model(model_data=xgb_model_data, image=xgb_image, sagemaker_session=sagemaker_session) model = PipelineModel(models=[sparkml_model, xgb_model], role='SageMakerRole', sagemaker_session=sagemaker_session, name=endpoint_name) model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name) predictor = RealTimePredictor(endpoint=endpoint_name, sagemaker_session=sagemaker_session, serializer=json_serializer, content_type=CONTENT_TYPE_CSV, accept=CONTENT_TYPE_CSV) valid_data = '1.0,C,38.0,71.5,1.0,female' assert predictor.predict(valid_data) == "0.714013934135" invalid_data = "1.0,28.0,C,38.0,71.5,1.0" assert (predictor.predict(invalid_data) is None) model.delete_model() with pytest.raises(Exception) as exception: sagemaker_session.sagemaker_client.describe_model(ModelName=model.name) assert 'Could not find model' in str(exception.value)
def test_async_walkthrough(sagemaker_session, cpu_instance_type, training_set): job_name = unique_name_from_base("pca") with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): pca = sagemaker.amazon.pca.PCA( role="SageMakerRole", instance_count=1, instance_type=cpu_instance_type, num_components=48, sagemaker_session=sagemaker_session, ) pca.algorithm_mode = "randomized" pca.subtract_mean = True pca.extra_components = 5 pca.fit(pca.record_set(training_set[0][:100]), job_name=job_name) with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): predictor_async = pca.deploy( endpoint_name=job_name, initial_instance_count=1, instance_type=cpu_instance_type, async_inference_config=AsyncInferenceConfig(), ) assert isinstance(predictor_async, AsyncPredictor) data = training_set[0][:5] result_no_wait_with_data = predictor_async.predict_async(data=data) assert isinstance(result_no_wait_with_data, AsyncInferenceResponse) assert result_no_wait_with_data.output_path.startswith( "s3://" + sagemaker_session.default_bucket() ) time.sleep(5) result_no_wait_with_data = result_no_wait_with_data.get_result() assert len(result_no_wait_with_data) == 5 for record in result_no_wait_with_data: assert record.label["projection"] is not None result_wait_with_data = predictor_async.predict(data=data) assert len(result_wait_with_data) == 5 for idx, record in enumerate(result_wait_with_data): assert record.label["projection"] is not None assert record.label["projection"] == result_no_wait_with_data[idx].label["projection"] s3_key_prefix = os.path.join( "integ-test-test-async-inference", job_name, ) input_s3_path = os.path.join( "s3://", sagemaker_session.default_bucket(), s3_key_prefix, "async-inference-pca-input.csv", ) sagemaker_session.upload_data( path=INPUT_LOCAL_PATH, bucket=sagemaker_session.default_bucket(), key_prefix=s3_key_prefix, extra_args={"ContentType": "text/csv"}, ) result_not_wait = predictor_async.predict_async(input_path=input_s3_path) assert isinstance(result_not_wait, AsyncInferenceResponse) assert result_not_wait.output_path.startswith("s3://" + sagemaker_session.default_bucket()) time.sleep(5) result_not_wait = result_not_wait.get_result() assert len(result_not_wait) == 5 for record in result_not_wait: assert record.label["projection"] is not None result_wait = predictor_async.predict(input_path=input_s3_path) assert len(result_wait) == 5 for idx, record in enumerate(result_wait): assert record.label["projection"] is not None assert record.label["projection"] == result_not_wait[idx].label["projection"]
def test_multi_data_model_deploy_pretrained_models(container_image, sagemaker_session, cpu_instance_type): timestamp = sagemaker_timestamp() endpoint_name = "test-multimodel-endpoint-{}".format(timestamp) model_name = "test-multimodel-{}".format(timestamp) # Define pretrained model local path pretrained_model_data_local_path = os.path.join(DATA_DIR, "sparkml_model", "mleap_model.tar.gz") with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): model_data_prefix = os.path.join("s3://", sagemaker_session.default_bucket(), "multimodel-{}/".format(timestamp)) multi_data_model = MultiDataModel( name=model_name, model_data_prefix=model_data_prefix, image=container_image, role=ROLE, sagemaker_session=sagemaker_session, ) # Add model before deploy multi_data_model.add_model(pretrained_model_data_local_path, PRETRAINED_MODEL_PATH_1) # Deploy model to an endpoint multi_data_model.deploy(1, cpu_instance_type, endpoint_name=endpoint_name) # Add models after deploy multi_data_model.add_model(pretrained_model_data_local_path, PRETRAINED_MODEL_PATH_2) endpoint_models = [] for model_path in multi_data_model.list_models(): endpoint_models.append(model_path) assert PRETRAINED_MODEL_PATH_1 in endpoint_models assert PRETRAINED_MODEL_PATH_2 in endpoint_models predictor = RealTimePredictor( endpoint=endpoint_name, sagemaker_session=sagemaker_session, serializer=npy_serializer, deserializer=string_deserializer, ) data = numpy.zeros(shape=(1, 1, 28, 28)) result = predictor.predict(data, target_model=PRETRAINED_MODEL_PATH_1) assert result == "Invoked model: {}".format(PRETRAINED_MODEL_PATH_1) result = predictor.predict(data, target_model=PRETRAINED_MODEL_PATH_2) assert result == "Invoked model: {}".format(PRETRAINED_MODEL_PATH_2) # Cleanup sagemaker_session.sagemaker_client.delete_endpoint_config( EndpointConfigName=endpoint_name) multi_data_model.delete_model() with pytest.raises(Exception) as exception: sagemaker_session.sagemaker_client.describe_model( ModelName=multi_data_model.name) assert "Could not find model" in str(exception.value) sagemaker_session.sagemaker_client.describe_endpoint_config( name=endpoint_name) assert "Could not find endpoint" in str(exception.value)
def test_linear_learner(sagemaker_session, cpu_instance_type, training_set): job_name = unique_name_from_base("linear-learner") with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): training_set[1][:100] = 1 training_set[1][100:200] = 0 training_set = training_set[0], training_set[1].astype( np.dtype("float32")) ll = LinearLearner( "SageMakerRole", 1, cpu_instance_type, predictor_type="binary_classifier", sagemaker_session=sagemaker_session, ) ll.binary_classifier_model_selection_criteria = "accuracy" ll.target_recall = 0.5 ll.target_precision = 0.5 ll.positive_example_weight_mult = 0.1 ll.epochs = 1 ll.use_bias = True ll.num_models = 1 ll.num_calibration_samples = 1 ll.init_method = "uniform" ll.init_scale = 0.5 ll.init_sigma = 0.2 ll.init_bias = 5 ll.optimizer = "adam" ll.loss = "logistic" ll.wd = 0.5 ll.l1 = 0.5 ll.momentum = 0.5 ll.learning_rate = 0.1 ll.beta_1 = 0.1 ll.beta_2 = 0.1 ll.use_lr_scheduler = True ll.lr_scheduler_step = 2 ll.lr_scheduler_factor = 0.5 ll.lr_scheduler_minimum_lr = 0.1 ll.normalize_data = False ll.normalize_label = False ll.unbias_data = True ll.unbias_label = False ll.num_point_for_scaler = 10000 ll.margin = 1.0 ll.quantile = 0.5 ll.loss_insensitivity = 0.1 ll.huber_delta = 0.1 ll.early_stopping_tolerance = 0.0001 ll.early_stopping_patience = 3 ll.fit(ll.record_set(training_set[0][:200], training_set[1][:200]), job_name=job_name) with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): predictor = ll.deploy(1, cpu_instance_type, endpoint_name=job_name) result = predictor.predict(training_set[0][0:100]) assert len(result) == 100 for record in result: assert record.label["predicted_label"] is not None assert record.label["score"] is not None
def test_tuning_byo_estimator(sagemaker_session, cpu_instance_type): """Use Factorization Machines algorithm as an example here. First we need to prepare data for training. We take standard data set, convert it to the format that the algorithm can process and upload it to S3. Then we create the Estimator and set hyperparamets as required by the algorithm. Next, we can call fit() with path to the S3. Later the trained model is deployed and prediction is called against the endpoint. Default predictor is updated with json serializer and deserializer. """ image_name = registry(sagemaker_session.boto_session.region_name ) + "/factorization-machines:1" training_data_path = os.path.join(DATA_DIR, "dummy_tensor") with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES): data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz") pickle_args = {} if sys.version_info.major == 2 else { "encoding": "latin1" } with gzip.open(data_path, "rb") as f: train_set, _, _ = pickle.load(f, **pickle_args) prefix = "test_byo_estimator" key = "recordio-pb-data" s3_train_data = sagemaker_session.upload_data(path=training_data_path, key_prefix=os.path.join( prefix, "train", key)) estimator = Estimator( image_name=image_name, role="SageMakerRole", train_instance_count=1, train_instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, ) estimator.set_hyperparameters(num_factors=10, feature_dim=784, mini_batch_size=100, predictor_type="binary_classifier") hyperparameter_ranges = {"mini_batch_size": IntegerParameter(100, 200)} tuner = HyperparameterTuner( estimator=estimator, objective_metric_name="test:binary_classification_accuracy", hyperparameter_ranges=hyperparameter_ranges, max_jobs=2, max_parallel_jobs=2, ) tuner.fit( { "train": s3_train_data, "test": s3_train_data }, include_cls_metadata=False, job_name=unique_name_from_base("byo", 32), ) print("Started hyperparameter tuning job with name:" + tuner.latest_tuning_job.name) time.sleep(15) tuner.wait() best_training_job = tuner.best_training_job() with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session): predictor = tuner.deploy(1, cpu_instance_type, endpoint_name=best_training_job) predictor.serializer = _fm_serializer predictor.content_type = "application/json" predictor.deserializer = json_deserializer result = predictor.predict(train_set[0][:10]) assert len(result["predictions"]) == 10 for prediction in result["predictions"]: assert prediction["score"] is not None