def test_fail_distributed_training(sagemaker_session, sklearn_version): with pytest.raises(AttributeError) as error: SKLearn( entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=DIST_INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE, py_version=PYTHON_VERSION, framework_version=sklearn_version, ) assert "Scikit-Learn does not support distributed training." in str(error)
def test_training_with_additional_hyperparameters(sagemaker_session, sklearn_full_version): with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): script_path = os.path.join(DATA_DIR, 'sklearn_mnist', 'mnist.py') data_path = os.path.join(DATA_DIR, 'sklearn_mnist') sklearn = SKLearn(entry_point=script_path, role='SageMakerRole', train_instance_type="ml.c4.xlarge", framework_version=sklearn_full_version, py_version=PYTHON_VERSION, sagemaker_session=sagemaker_session, hyperparameters={'epochs': 1}) train_input = sklearn.sagemaker_session.upload_data(path=os.path.join(data_path, 'train'), key_prefix='integ-test-data/sklearn_mnist/train') test_input = sklearn.sagemaker_session.upload_data(path=os.path.join(data_path, 'test'), key_prefix='integ-test-data/sklearn_mnist/test') job_name = unique_name_from_base('test-sklearn-hp') sklearn.fit({'train': train_input, 'test': test_input}, job_name=job_name) return sklearn.latest_training_job.name
def test_attach_deploy(sklearn_training_job, sagemaker_session, cpu_instance_type): endpoint_name = "test-sklearn-attach-deploy-{}".format( sagemaker_timestamp()) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): estimator = SKLearn.attach(sklearn_training_job, sagemaker_session=sagemaker_session) predictor = estimator.deploy(1, cpu_instance_type, endpoint_name=endpoint_name) _predict_and_assert(predictor)
def main(args): print("args.local=", args.local) # Initialise SDK sklearn_estimator = SKLearn( entry_point='src/train_and_deploy.py', role=CLOUD_CONFIG['sagemaker_role_id']['value'], train_instance_type='local' if args.local else 'ml.m4.xlarge', hyperparameters={ 'sagemaker_submit_directory': f"s3://{CLOUD_CONFIG['s3bucket']['value']}", }, framework_version='0.23-1', metric_definitions=[{ 'Name': 'train:score', 'Regex': 'train:score=(\S+)' }], ) # Run model training job sklearn_estimator.fit({ 'train': "file://./data/data.csv" if args.local else f"s3://{CLOUD_CONFIG['s3bucket']['value']}/data.csv" }) # Deploy trained model to an endpoint sklearn_estimator.deploy( instance_type='local' if args.local else 'ml.t2.medium', initial_instance_count=1, endpoint_name='demo-endpoint', )
def test_failed_training_job(sagemaker_session, sklearn_full_version): with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): script_path = os.path.join(DATA_DIR, "sklearn_mnist", "failure_script.py") data_path = os.path.join(DATA_DIR, "sklearn_mnist") sklearn = SKLearn( entry_point=script_path, role="SageMakerRole", framework_version=sklearn_full_version, py_version=PYTHON_VERSION, train_instance_count=1, train_instance_type="ml.c4.xlarge", sagemaker_session=sagemaker_session, ) train_input = sklearn.sagemaker_session.upload_data( path=os.path.join(data_path, "train"), key_prefix="integ-test-data/sklearn_mnist/train" ) job_name = unique_name_from_base("test-sklearn-failed") with pytest.raises(ValueError): sklearn.fit(train_input, job_name=job_name)
def _sklearn_estimator( sagemaker_session, framework_version, instance_type=None, base_job_name=None, **kwargs ): return SKLearn( entry_point=SCRIPT_PATH, framework_version=framework_version, role=ROLE, sagemaker_session=sagemaker_session, instance_type=instance_type if instance_type else INSTANCE_TYPE, base_job_name=base_job_name, py_version=PYTHON_VERSION, **kwargs )
def test_estimator_py2_warning(warning, sagemaker_session): estimator = SKLearn( entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE, py_version="py2", ) assert estimator.py_version == "py2" warning.assert_called_with(estimator.__framework_name__, defaults.LATEST_PY2_VERSION)
def test_estimator_throws_error_for_unsupported_version( error, sagemaker_session): with pytest.raises(ValueError): estimator = SKLearn( entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE, framework_version="foo", ) assert estimator.framework_version not in defaults.SKLEARN_SUPPORTED_VERSIONS error.assert_called_with(defaults.SKLEARN_NAME, "foo", defaults.SKLEARN_SUPPORT_VERSIONS)
def test_create_model_from_estimator(name_from_base, sagemaker_session, sklearn_version): container_log_level = '"logging.INFO"' source_dir = "s3://mybucket/source" base_job_name = "job" sklearn = SKLearn( entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, instance_type=INSTANCE_TYPE, framework_version=sklearn_version, container_log_level=container_log_level, py_version=PYTHON_VERSION, base_job_name=base_job_name, source_dir=source_dir, enable_network_isolation=True, ) sklearn.fit(inputs="s3://mybucket/train", job_name="new_name") model_name = "model_name" name_from_base.return_value = model_name model = sklearn.create_model() assert model.sagemaker_session == sagemaker_session assert model.framework_version == sklearn_version assert model.py_version == sklearn.py_version assert model.entry_point == SCRIPT_PATH assert model.role == ROLE assert model.name == model_name assert model.container_log_level == container_log_level assert model.source_dir == source_dir assert model.vpc_config is None assert model.enable_network_isolation() name_from_base.assert_called_with(base_job_name)
def test_training_with_network_isolation( sagemaker_session, sklearn_latest_version, sklearn_latest_py_version, cpu_instance_type, ): with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): script_path = os.path.join(DATA_DIR, "sklearn_mnist", "mnist.py") data_path = os.path.join(DATA_DIR, "sklearn_mnist") sklearn = SKLearn( entry_point=script_path, role="SageMakerRole", instance_type=cpu_instance_type, framework_version=sklearn_latest_version, py_version=sklearn_latest_py_version, sagemaker_session=sagemaker_session, hyperparameters={"epochs": 1}, enable_network_isolation=True, ) train_input = sklearn.sagemaker_session.upload_data( path=os.path.join(data_path, "train"), key_prefix="integ-test-data/sklearn_mnist/train") test_input = sklearn.sagemaker_session.upload_data( path=os.path.join(data_path, "test"), key_prefix="integ-test-data/sklearn_mnist/test") job_name = unique_name_from_base("test-sklearn-hp") sklearn.fit({ "train": train_input, "test": test_input }, job_name=job_name) assert sagemaker_session.sagemaker_client.describe_training_job( TrainingJobName=job_name)["EnableNetworkIsolation"]
def test_async_fit(sagemaker_session): endpoint_name = 'test-sklearn-attach-deploy-{}'.format(sagemaker_timestamp()) with timeout(minutes=5): training_job_name = _run_mnist_training_job(sagemaker_session, "ml.c4.xlarge", sklearn_full_version=SKLEARN_VERSION, wait=False) print("Waiting to re-attach to the training job: %s" % training_job_name) time.sleep(20) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): print("Re-attaching now to: %s" % training_job_name) estimator = SKLearn.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session) predictor = estimator.deploy(1, "ml.c4.xlarge", endpoint_name=endpoint_name) _predict_and_assert(predictor)
def _run_mnist_training_job(sagemaker_session, instance_type, sklearn_version, py_version, wait=True): with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): script_path = os.path.join(DATA_DIR, "sklearn_mnist", "mnist.py") data_path = os.path.join(DATA_DIR, "sklearn_mnist") sklearn = SKLearn( entry_point=script_path, role="SageMakerRole", framework_version=sklearn_version, py_version=py_version, instance_type=instance_type, sagemaker_session=sagemaker_session, hyperparameters={"epochs": 1}, ) train_input = sklearn.sagemaker_session.upload_data( path=os.path.join(data_path, "train"), key_prefix="integ-test-data/sklearn_mnist/train") test_input = sklearn.sagemaker_session.upload_data( path=os.path.join(data_path, "test"), key_prefix="integ-test-data/sklearn_mnist/test") job_name = unique_name_from_base("test-sklearn-mnist") sklearn.fit({ "train": train_input, "test": test_input }, wait=wait, job_name=job_name) return sklearn.latest_training_job.name
def test_attach(sagemaker_session, sklearn_version): training_image = "1.dkr.ecr.us-west-2.amazonaws.com/sagemaker-scikit-learn:{}-cpu-{}".format( sklearn_version, PYTHON_VERSION ) returned_job_description = { "AlgorithmSpecification": {"TrainingInputMode": "File", "TrainingImage": training_image}, "HyperParameters": { "sagemaker_submit_directory": '"s3://some/sourcedir.tar.gz"', "sagemaker_program": '"iris-dnn-classifier.py"', "sagemaker_s3_uri_training": '"sagemaker-3/integ-test-data/tf_iris"', "sagemaker_enable_cloudwatch_metrics": "false", "sagemaker_container_log_level": '"logging.INFO"', "sagemaker_job_name": '"neo"', "training_steps": "100", "sagemaker_region": '"us-west-2"', }, "RoleArn": "arn:aws:iam::366:role/SageMakerRole", "ResourceConfig": { "VolumeSizeInGB": 30, "InstanceCount": 1, "InstanceType": "ml.c4.xlarge", }, "StoppingCondition": {"MaxRuntimeInSeconds": 24 * 60 * 60}, "TrainingJobName": "neo", "TrainingJobStatus": "Completed", "TrainingJobArn": "arn:aws:sagemaker:us-west-2:336:training-job/neo", "OutputDataConfig": {"KmsKeyId": "", "S3OutputPath": "s3://place/output/neo"}, "TrainingJobOutput": {"S3TrainingJobOutput": "s3://here/output.tar.gz"}, } sagemaker_session.sagemaker_client.describe_training_job = Mock( name="describe_training_job", return_value=returned_job_description ) estimator = SKLearn.attach(training_job_name="neo", sagemaker_session=sagemaker_session) assert estimator._current_job_name == "neo" assert estimator.latest_training_job.job_name == "neo" assert estimator.py_version == PYTHON_VERSION assert estimator.framework_version == sklearn_version assert estimator.role == "arn:aws:iam::366:role/SageMakerRole" assert estimator.train_instance_count == 1 assert estimator.train_max_run == 24 * 60 * 60 assert estimator.input_mode == "File" assert estimator.base_job_name == "neo" assert estimator.output_path == "s3://place/output/neo" assert estimator.output_kms_key == "" assert estimator.hyperparameters()["training_steps"] == "100" assert estimator.source_dir == "s3://some/sourcedir.tar.gz" assert estimator.entry_point == "iris-dnn-classifier.py"
def test_sklearn(strftime, sagemaker_session, sklearn_version): sklearn = SKLearn( entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, train_instance_type=INSTANCE_TYPE, py_version=PYTHON_VERSION, framework_version=sklearn_version, ) inputs = "s3://mybucket/train" sklearn.fit(inputs=inputs, experiment_config=EXPERIMENT_CONFIG) sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls] assert sagemaker_call_names == ["train", "logs_for_job"] boto_call_names = [ c[0] for c in sagemaker_session.boto_session.method_calls ] assert boto_call_names == ["resource"] expected_train_args = _create_train_job(sklearn_version) expected_train_args["input_config"][0]["DataSource"]["S3DataSource"][ "S3Uri"] = inputs expected_train_args["experiment_config"] = EXPERIMENT_CONFIG actual_train_args = sagemaker_session.method_calls[0][2] assert actual_train_args == expected_train_args model = sklearn.create_model() expected_image_base = ( "246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-scikit-learn:{}-cpu-{}" ) assert { "Environment": { "SAGEMAKER_SUBMIT_DIRECTORY": "s3://mybucket/sagemaker-scikit-learn-{}/source/sourcedir.tar.gz". format(TIMESTAMP), "SAGEMAKER_PROGRAM": "dummy_script.py", "SAGEMAKER_ENABLE_CLOUDWATCH_METRICS": "false", "SAGEMAKER_REGION": "us-west-2", "SAGEMAKER_CONTAINER_LOG_LEVEL": "20", }, "Image": expected_image_base.format(sklearn_version, PYTHON_VERSION), "ModelDataUrl": "s3://m/m.tar.gz", } == model.prepare_container_def(CPU) assert "cpu" in model.prepare_container_def(CPU)["Image"] predictor = sklearn.deploy(1, CPU) assert isinstance(predictor, SKLearnPredictor)
def test_attach_custom_image(sagemaker_session): training_image = "1.dkr.ecr.us-west-2.amazonaws.com/my_custom_sklearn_image:latest" returned_job_description = { "AlgorithmSpecification": { "TrainingInputMode": "File", "TrainingImage": training_image }, "HyperParameters": { "sagemaker_submit_directory": '"s3://some/sourcedir.tar.gz"', "sagemaker_program": '"iris-dnn-classifier.py"', "sagemaker_s3_uri_training": '"sagemaker-3/integ-test-data/tf_iris"', "sagemaker_enable_cloudwatch_metrics": "false", "sagemaker_container_log_level": '"logging.INFO"', "sagemaker_job_name": '"neo"', "training_steps": "100", "sagemaker_region": '"us-west-2"', }, "RoleArn": "arn:aws:iam::366:role/SageMakerRole", "ResourceConfig": { "VolumeSizeInGB": 30, "InstanceCount": 1, "InstanceType": "ml.c4.xlarge", }, "StoppingCondition": { "MaxRuntimeInSeconds": 24 * 60 * 60 }, "TrainingJobName": "neo", "TrainingJobStatus": "Completed", "TrainingJobArn": "arn:aws:sagemaker:us-west-2:336:training-job/neo", "OutputDataConfig": { "KmsKeyId": "", "S3OutputPath": "s3://place/output/neo" }, "TrainingJobOutput": { "S3TrainingJobOutput": "s3://here/output.tar.gz" }, } sagemaker_session.sagemaker_client.describe_training_job = Mock( name="describe_training_job", return_value=returned_job_description) estimator = SKLearn.attach(training_job_name="neo", sagemaker_session=sagemaker_session) assert estimator.image_name == training_image assert estimator.train_image() == training_image
def test_attach_custom_image(sagemaker_session): training_image = '1.dkr.ecr.us-west-2.amazonaws.com/my_custom_sklearn_image:latest' returned_job_description = { 'AlgorithmSpecification': { 'TrainingInputMode': 'File', 'TrainingImage': training_image }, 'HyperParameters': { 'sagemaker_submit_directory': '"s3://some/sourcedir.tar.gz"', 'sagemaker_program': '"iris-dnn-classifier.py"', 'sagemaker_s3_uri_training': '"sagemaker-3/integ-test-data/tf_iris"', 'sagemaker_enable_cloudwatch_metrics': 'false', 'sagemaker_container_log_level': '"logging.INFO"', 'sagemaker_job_name': '"neo"', 'training_steps': '100', 'sagemaker_region': '"us-west-2"' }, 'RoleArn': 'arn:aws:iam::366:role/SageMakerRole', 'ResourceConfig': { 'VolumeSizeInGB': 30, 'InstanceCount': 1, 'InstanceType': 'ml.c4.xlarge' }, 'StoppingCondition': { 'MaxRuntimeInSeconds': 24 * 60 * 60 }, 'TrainingJobName': 'neo', 'TrainingJobStatus': 'Completed', 'TrainingJobArn': 'arn:aws:sagemaker:us-west-2:336:training-job/neo', 'OutputDataConfig': { 'KmsKeyId': '', 'S3OutputPath': 's3://place/output/neo' }, 'TrainingJobOutput': { 'S3TrainingJobOutput': 's3://here/output.tar.gz' } } sagemaker_session.sagemaker_client.describe_training_job = Mock( name='describe_training_job', return_value=returned_job_description) estimator = SKLearn.attach(training_job_name='neo', sagemaker_session=sagemaker_session) assert estimator.image_name == training_image assert estimator.train_image() == training_image
def test_sklearn(strftime, sagemaker_session, sklearn_version): sklearn = SKLearn(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, train_instance_type=INSTANCE_TYPE, py_version=PYTHON_VERSION, framework_version=sklearn_version) inputs = 's3://mybucket/train' sklearn.fit(inputs=inputs) sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls] assert sagemaker_call_names == ['train', 'logs_for_job'] boto_call_names = [ c[0] for c in sagemaker_session.boto_session.method_calls ] assert boto_call_names == ['resource'] expected_train_args = _create_train_job(sklearn_version) expected_train_args['input_config'][0]['DataSource']['S3DataSource'][ 'S3Uri'] = inputs actual_train_args = sagemaker_session.method_calls[0][2] assert actual_train_args == expected_train_args model = sklearn.create_model() expected_image_base = '246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-scikit-learn:{}-cpu-{}' assert { 'Environment': { 'SAGEMAKER_SUBMIT_DIRECTORY': 's3://mybucket/sagemaker-scikit-learn-{}/source/sourcedir.tar.gz'. format(TIMESTAMP), 'SAGEMAKER_PROGRAM': 'dummy_script.py', 'SAGEMAKER_ENABLE_CLOUDWATCH_METRICS': 'false', 'SAGEMAKER_REGION': 'us-west-2', 'SAGEMAKER_CONTAINER_LOG_LEVEL': '20' }, 'Image': expected_image_base.format(sklearn_version, PYTHON_VERSION), 'ModelDataUrl': 's3://m/m.tar.gz' } == model.prepare_container_def(CPU) assert 'cpu' in model.prepare_container_def(CPU)['Image'] predictor = sklearn.deploy(1, CPU) assert isinstance(predictor, SKLearnPredictor)
def test_sklearn_airflow_config_uploads_data_source_to_s3( sagemaker_session, cpu_instance_type, sklearn_latest_version, sklearn_latest_py_version, ): with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS): script_path = os.path.join(DATA_DIR, "sklearn_mnist", "mnist.py") data_path = os.path.join(DATA_DIR, "sklearn_mnist") sklearn = SKLearn( entry_point=script_path, role=ROLE, instance_type=cpu_instance_type, framework_version=sklearn_latest_version, py_version=sklearn_latest_py_version, sagemaker_session=sagemaker_session, hyperparameters={"epochs": 1}, ) train_input = sklearn.sagemaker_session.upload_data( path=os.path.join(data_path, "train"), key_prefix="integ-test-data/sklearn_mnist/train") test_input = sklearn.sagemaker_session.upload_data( path=os.path.join(data_path, "test"), key_prefix="integ-test-data/sklearn_mnist/test") training_config = _build_airflow_workflow( estimator=sklearn, instance_type=cpu_instance_type, inputs={ "train": train_input, "test": test_input }, ) _assert_that_s3_url_contains_data( sagemaker_session, training_config["HyperParameters"] ["sagemaker_submit_directory"].strip('"'), )
inters_df.consultant.portfolio = sub_port(consultant_processing( \ list(inters_df.consultant)).portfolio) inters_df.consultant = cons_predictor(consultant_processing( \ list(inters_df.consultant))) inters_df = pd.concat([ inters_df.drop(["client", "duration", "ongoing", "n_transactions"], axis=1), client_processing(list(inters_df.client)) ], axis=1) inters_df.to_csv(key + "interactions.csv") upload_file(key + "interactions.csv") models = {} for name, df in inters_df.groupby("consultant"): model = SKLearn(entry_point="training_scripts.py", train_instance_type="ml.c4.xlarge", role=role, sagemaker_session=sagemaker_session, hyperparameters={"normalize": True}) model_fit = model.fit({"train": df}) models[name] = model_fit.deploy(initial_instance_count=1, instance_type="ml.m4.xlarge")
def get_pipeline( region, sagemaker_session, role=None, default_bucket=None, model_package_group_name="sts-sklearn-grp", pipeline_name="stsPipeline", base_job_prefix="sts", ) -> Pipeline: """Gets a SageMaker ML Pipeline instance working with on sts data. Args: region: AWS region to create and run the pipeline. role: IAM role to create and run steps and pipeline. default_bucket: the bucket to use for storing the artifacts Returns: an instance of a pipeline """ """ Instance types allowed: ml.r5.12xlarge, ml.m5.4xlarge, ml.p2.xlarge, ml.m4.16xlarge, ml.r5.24xlarge, ml.t3.xlarge, ml.r5.16xlarge, ml.m5.large, ml.p3.16xlarge, ml.p2.16xlarge, ml.c4.2xlarge, ml.c5.2xlarge, ml.c4.4xlarge, ml.c5.4xlarge, ml.c4.8xlarge, ml.c5.9xlarge, ml.c5.xlarge, ml.c4.xlarge, ml.t3.2xlarge, ml.t3.medium, ml.c5.18xlarge, ml.r5.2xlarge, ml.p3.2xlarge, ml.m5.xlarge, ml.m4.10xlarge, ml.r5.4xlarge, ml.m5.12xlarge, ml.m4.xlarge, ml.t3.large, ml.m5.24xlarge, ml.m4.2xlarge, ml.m5.2xlarge, ml.p2.8xlarge, ml.r5.8xlarge, ml.r5.xlarge, ml.r5.large, ml.p3.8xlarge, ml.m4.4xlarge see https://aws.amazon.com/blogs/machine-learning/right-sizing-resources-and-avoiding-unnecessary-costs-in-amazon-sagemaker/ """ sagemaker_session = get_session(region, default_bucket) if role is None: role = sagemaker.session.get_execution_role(sagemaker_session) # parameters for pipeline execution processing_instance_count = ParameterInteger( name="ProcessingInstanceCount", default_value=1) processing_instance_type = ParameterString(name="ProcessingInstanceType", default_value="ml.m5.xlarge") # as of free tier of 50 hours of m4.xlarge or m5.xlarge instances training_instance_type = ParameterString(name="TrainingInstanceType", default_value="ml.m5.xlarge") model_approval_status = ParameterString(name="ModelApprovalStatus", default_value="Approved") # preprocess # preprocess input data input_data = ParameterString( name="InputDataUrl", default_value=f"s3://sts-datwit-dataset/stsmsrpc.txt", ) # processing step for feature engineering sklearn_processor = SKLearnProcessor( framework_version="0.23-1", instance_type=processing_instance_type, instance_count=processing_instance_count, base_job_name=f"{base_job_prefix}/sklearn-sts-preprocess", sagemaker_session=sagemaker_session, role=role, ) step_preprocess = ProcessingStep( name="PreprocessSTSData", processor=sklearn_processor, outputs=[ ProcessingOutput(output_name="train", source="/opt/ml/processing/train"), ProcessingOutput(output_name="validation", source="/opt/ml/processing/validation"), ProcessingOutput(output_name="test", source="/opt/ml/processing/test"), ], code=os.path.join(BASE_DIR, "preprocess.py"), job_arguments=["--input-data", input_data], ) # training step for generating model artifacts model_path = f"s3://{sagemaker_session.default_bucket()}/{base_job_prefix}/stsTrain" image_uri = sagemaker.image_uris.retrieve( framework="sklearn", region=region, version="0.23-1", py_version="py3", instance_type=training_instance_type, ) sklearn_estimator = SKLearn( entry_point='training.py', source_dir=BASE_DIR, instance_type=training_instance_type, instance_count=1, output_path=model_path, framework_version="0.23-1", py_version="py3", base_job_name=f"{base_job_prefix}/sts-train", sagemaker_session=sagemaker_session, role=role, ) step_train = TrainingStep( name="TrainSTSModel", estimator=sklearn_estimator, inputs={ "train": TrainingInput( s3_data=step_preprocess.properties.ProcessingOutputConfig. Outputs["train"].S3Output.S3Uri, content_type="text/csv", ), "validation": TrainingInput( s3_data=step_preprocess.properties.ProcessingOutputConfig. Outputs["validation"].S3Output.S3Uri, content_type="text/csv", ), }, ) # processing step for evaluation script_eval = ScriptProcessor( image_uri=image_uri, command=["python3"], instance_type=processing_instance_type, instance_count=1, base_job_name=f"{base_job_prefix}/script-sts-eval", sagemaker_session=sagemaker_session, role=role, ) evaluation_report = PropertyFile( name="stsEvaluationReport", output_name="evaluation", path="evaluation.json", ) step_eval = ProcessingStep( name="EvaluateSTSModel", processor=script_eval, inputs=[ ProcessingInput( source=step_train.properties.ModelArtifacts.S3ModelArtifacts, destination="/opt/ml/processing/model", ), ProcessingInput( source=step_preprocess.properties.ProcessingOutputConfig. Outputs["test"].S3Output.S3Uri, destination="/opt/ml/processing/test", ), ], outputs=[ ProcessingOutput(output_name="evaluation", source="/opt/ml/processing/evaluation"), ], code=os.path.join(BASE_DIR, "evaluate.py"), property_files=[evaluation_report], ) # setup model quality monitoring baseline data script_process_baseline_data = ScriptProcessor( image_uri=image_uri, command=["python3"], instance_type=processing_instance_type, instance_count=1, base_job_name=f"{base_job_prefix}/baseline", sagemaker_session=sagemaker_session, role=role, ) step_proccess_baseline_data = ProcessingStep( name="SetupMonitoringData", processor=script_process_baseline_data, inputs=[ ProcessingInput( source=step_train.properties.ModelArtifacts.S3ModelArtifacts, destination="/opt/ml/processing/model", ), ProcessingInput( source=step_preprocess.properties.ProcessingOutputConfig. Outputs["validation"].S3Output.S3Uri, destination="/opt/ml/processing/validation", ), ], outputs=[ ProcessingOutput(output_name="validate", source="/opt/ml/processing/validate"), ], code=os.path.join(BASE_DIR, "baseline.py")) # --- # register model step that will be conditionally executed model_metrics = ModelMetrics( model_statistics=MetricsSource(s3_uri="{}/evaluation.json".format( step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0] ["S3Output"]["S3Uri"]), content_type="application/json")) step_register = RegisterModel( name="RegisterSTSModel", estimator=sklearn_estimator, model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts, content_types=["text/csv"], response_types=["text/csv"], inference_instances=["ml.m5.xlarge"], transform_instances=["ml.m5.xlarge"], model_package_group_name=model_package_group_name, approval_status=model_approval_status, model_metrics=model_metrics, ) # condition step for evaluating model quality and branching execution cond_lte = ConditionLessThanOrEqualTo( left=JsonGet(step=step_eval, property_file=evaluation_report, json_path="regression_metrics.mse.value"), right=6.0, ) step_cond = ConditionStep( name="CheckMSESTSEvaluation", conditions=[cond_lte], if_steps=[step_register, step_proccess_baseline_data], # if_steps=[step_register], else_steps=[], ) # pipeline instance pipeline = Pipeline( name=pipeline_name, parameters=[ processing_instance_type, processing_instance_count, training_instance_type, model_approval_status, input_data, ], steps=[step_preprocess, step_train, step_eval, step_cond], sagemaker_session=sagemaker_session, ) return pipeline
from sagemaker.sklearn import SKLearn # Initialise SDK sklearn_estimator = SKLearn( entry_point='train_and_deploy.py', role='arn:aws:iam::<your-sagemaker-role>', # train_instance_type='ml.m4.xlarge', train_instance_type='local', output_path='s3://<path-to-output-dir>/', hyperparameters={ 'sagemaker_submit_directory': 's3://<path-to-sagemaker_submit_directory>' }, code_location='s3://<path-to-code_location>', framework_version='0.20.0') # Run model training job sklearn_estimator.fit({'train': 's3://<path-to-training-data-dir>'}) # Deploy trained model to an endpoint predictor = sklearn_estimator.deploy( # instance_type='ml.t2.medium', instance_type='local', initial_instance_count=1, endpoint_name='<your-end-point-name>', )