def main(args): print("args.local=", args.local) # Initialise SDK sklearn_estimator = SKLearn( entry_point='src/train_and_deploy.py', role=CLOUD_CONFIG['sagemaker_role_id']['value'], train_instance_type='local' if args.local else 'ml.m4.xlarge', hyperparameters={ 'sagemaker_submit_directory': f"s3://{CLOUD_CONFIG['s3bucket']['value']}", }, framework_version='0.23-1', metric_definitions=[{ 'Name': 'train:score', 'Regex': 'train:score=(\S+)' }], ) # Run model training job sklearn_estimator.fit({ 'train': "file://./data/data.csv" if args.local else f"s3://{CLOUD_CONFIG['s3bucket']['value']}/data.csv" }) # Deploy trained model to an endpoint sklearn_estimator.deploy( instance_type='local' if args.local else 'ml.t2.medium', initial_instance_count=1, endpoint_name='demo-endpoint', )
def main(): print('Starting model training.') print('Note: if launching for the first time in local mode, container image download might take a few minutes to complete.') sklearn = SKLearn( entry_point="scikit_boston_housing.py", source_dir='code', framework_version="0.23-1", instance_type="local", role=DUMMY_IAM_ROLE ) delta_lake_profile_file = "file://./profile/open-datasets.share" sklearn.fit({"train": delta_lake_profile_file}) print('Completed model training') print('Deploying endpoint in local mode') predictor = sklearn.deploy(initial_instance_count=1, instance_type='local') test_sample = [[0.00632,18.0,2.31,0,0.538,6.575,65.2,4.0900,1,296,15.3,396.90,4.98]] prediction = predictor.predict(test_sample) print(f'Prediction: {prediction}') print('About to delete the endpoint to stop paying (if in cloud mode).') predictor.delete_endpoint(predictor.endpoint_name)
def main(): download_training_and_eval_data() print('Starting model training.') print( 'Note: if launching for the first time in local mode, container image download might take a few minutes to complete.' ) sklearn = SKLearn( entry_point="scikit_learn_iris.py", source_dir='code', framework_version="0.23-1", instance_type="local", role=DUMMY_IAM_ROLE, hyperparameters={"max_leaf_nodes": 30}, ) train_input = "file://./data/iris.csv" sklearn.fit({"train": train_input}) print('Completed model training') print('Deploying endpoint in local mode') predictor = sklearn.deploy(initial_instance_count=1, instance_type='local') do_inference_on_local_endpoint(predictor) print('About to delete the endpoint to stop paying (if in cloud mode).') predictor.delete_endpoint(predictor.endpoint_name)
def main(): download_training_and_eval_data() print('Starting model training.') print( 'Note: if launching for the first time in local mode, container image download might take a few minutes to complete.' ) sklearn = SKLearn( entry_point="catboost_train_deploy.py", source_dir='code', framework_version="0.23-1", instance_type="local", role=DUMMY_IAM_ROLE, ) train_location = 'file://' + local_train validation_location = 'file://' + local_validation sklearn.fit({'train': train_location, 'validation': validation_location}) print('Completed model training') print('Deploying endpoint in local mode') predictor = sklearn.deploy(1, 'local', serializer=csv_serializer) with open(local_test, 'r') as f: payload = f.read().strip() predictions = predictor.predict(payload) print('predictions: {}'.format(predictions)) predictor.delete_endpoint(predictor.endpoint)
def test_sklearn(strftime, sagemaker_session, sklearn_version): sklearn = SKLearn( entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, train_instance_type=INSTANCE_TYPE, py_version=PYTHON_VERSION, framework_version=sklearn_version, ) inputs = "s3://mybucket/train" sklearn.fit(inputs=inputs, experiment_config=EXPERIMENT_CONFIG) sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls] assert sagemaker_call_names == ["train", "logs_for_job"] boto_call_names = [ c[0] for c in sagemaker_session.boto_session.method_calls ] assert boto_call_names == ["resource"] expected_train_args = _create_train_job(sklearn_version) expected_train_args["input_config"][0]["DataSource"]["S3DataSource"][ "S3Uri"] = inputs expected_train_args["experiment_config"] = EXPERIMENT_CONFIG actual_train_args = sagemaker_session.method_calls[0][2] assert actual_train_args == expected_train_args model = sklearn.create_model() expected_image_base = ( "246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-scikit-learn:{}-cpu-{}" ) assert { "Environment": { "SAGEMAKER_SUBMIT_DIRECTORY": "s3://mybucket/sagemaker-scikit-learn-{}/source/sourcedir.tar.gz". format(TIMESTAMP), "SAGEMAKER_PROGRAM": "dummy_script.py", "SAGEMAKER_ENABLE_CLOUDWATCH_METRICS": "false", "SAGEMAKER_REGION": "us-west-2", "SAGEMAKER_CONTAINER_LOG_LEVEL": "20", }, "Image": expected_image_base.format(sklearn_version, PYTHON_VERSION), "ModelDataUrl": "s3://m/m.tar.gz", } == model.prepare_container_def(CPU) assert "cpu" in model.prepare_container_def(CPU)["Image"] predictor = sklearn.deploy(1, CPU) assert isinstance(predictor, SKLearnPredictor)
def test_sklearn(strftime, sagemaker_session, sklearn_version): sklearn = SKLearn(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, train_instance_type=INSTANCE_TYPE, py_version=PYTHON_VERSION, framework_version=sklearn_version) inputs = 's3://mybucket/train' sklearn.fit(inputs=inputs) sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls] assert sagemaker_call_names == ['train', 'logs_for_job'] boto_call_names = [ c[0] for c in sagemaker_session.boto_session.method_calls ] assert boto_call_names == ['resource'] expected_train_args = _create_train_job(sklearn_version) expected_train_args['input_config'][0]['DataSource']['S3DataSource'][ 'S3Uri'] = inputs actual_train_args = sagemaker_session.method_calls[0][2] assert actual_train_args == expected_train_args model = sklearn.create_model() expected_image_base = '246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-scikit-learn:{}-cpu-{}' assert { 'Environment': { 'SAGEMAKER_SUBMIT_DIRECTORY': 's3://mybucket/sagemaker-scikit-learn-{}/source/sourcedir.tar.gz'. format(TIMESTAMP), 'SAGEMAKER_PROGRAM': 'dummy_script.py', 'SAGEMAKER_ENABLE_CLOUDWATCH_METRICS': 'false', 'SAGEMAKER_REGION': 'us-west-2', 'SAGEMAKER_CONTAINER_LOG_LEVEL': '20' }, 'Image': expected_image_base.format(sklearn_version, PYTHON_VERSION), 'ModelDataUrl': 's3://m/m.tar.gz' } == model.prepare_container_def(CPU) assert 'cpu' in model.prepare_container_def(CPU)['Image'] predictor = sklearn.deploy(1, CPU) assert isinstance(predictor, SKLearnPredictor)
from sagemaker.sklearn import SKLearn # Initialise SDK sklearn_estimator = SKLearn( entry_point='train_and_deploy.py', role='arn:aws:iam::<your-sagemaker-role>', # train_instance_type='ml.m4.xlarge', train_instance_type='local', output_path='s3://<path-to-output-dir>/', hyperparameters={ 'sagemaker_submit_directory': 's3://<path-to-sagemaker_submit_directory>' }, code_location='s3://<path-to-code_location>', framework_version='0.20.0') # Run model training job sklearn_estimator.fit({'train': 's3://<path-to-training-data-dir>'}) # Deploy trained model to an endpoint predictor = sklearn_estimator.deploy( # instance_type='ml.t2.medium', instance_type='local', initial_instance_count=1, endpoint_name='<your-end-point-name>', )