def test_create_model_from_estimator(sagemaker_session, xgboost_version): container_log_level = '"logging.INFO"' source_dir = "s3://mybucket/source" xgboost = XGBoost( entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, train_instance_type=INSTANCE_TYPE, train_instance_count=1, framework_version=xgboost_version, container_log_level=container_log_level, py_version=PYTHON_VERSION, base_job_name="job", source_dir=source_dir, ) job_name = "new_name" xgboost.fit(inputs="s3://mybucket/train", job_name=job_name) model = xgboost.create_model() assert model.sagemaker_session == sagemaker_session assert model.framework_version == xgboost_version assert model.py_version == xgboost.py_version assert model.entry_point == SCRIPT_PATH assert model.role == ROLE assert model.name == job_name assert model.container_log_level == container_log_level assert model.source_dir == source_dir assert model.vpc_config is None
def test_create_model_with_optional_params(sagemaker_session): container_log_level = '"logging.INFO"' source_dir = "s3://mybucket/source" enable_cloudwatch_metrics = "true" xgboost = XGBoost( entry_point=SCRIPT_PATH, role=ROLE, framework_version=XGBOOST_LATEST_VERSION, sagemaker_session=sagemaker_session, train_instance_type=INSTANCE_TYPE, train_instance_count=1, container_log_level=container_log_level, py_version=PYTHON_VERSION, base_job_name="job", source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics, ) xgboost.fit(inputs="s3://mybucket/train", job_name="new_name") new_role = "role" model_server_workers = 2 vpc_config = {"Subnets": ["foo"], "SecurityGroupIds": ["bar"]} model = xgboost.create_model( role=new_role, model_server_workers=model_server_workers, vpc_config_override=vpc_config ) assert model.role == new_role assert model.model_server_workers == model_server_workers assert model.vpc_config == vpc_config
def test_training_with_network_isolation( sagemaker_session, xgboost_latest_version, xgboost_latest_py_version, cpu_instance_type, ): with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): base_job_name = "test-network-isolation-xgboost" xgboost = XGBoost( entry_point=os.path.join(DATA_DIR, "xgboost_abalone", "abalone.py"), role=ROLE, instance_type=cpu_instance_type, instance_count=1, framework_version=xgboost_latest_version, py_version=xgboost_latest_py_version, base_job_name=base_job_name, sagemaker_session=sagemaker_session, enable_network_isolation=True, ) train_input = xgboost.sagemaker_session.upload_data( path=os.path.join(DATA_DIR, "xgboost_abalone", "abalone"), key_prefix="integ-test-data/xgboost_abalone/abalone", ) job_name = unique_name_from_base(base_job_name) xgboost.fit(inputs={"train": train_input}, job_name=job_name) assert sagemaker_session.sagemaker_client.describe_training_job( TrainingJobName=job_name)["EnableNetworkIsolation"]
def test_distributed_training(strftime, sagemaker_session, xgboost_framework_version): xgboost = XGBoost( entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, instance_count=DIST_INSTANCE_COUNT, instance_type=INSTANCE_TYPE, py_version=PYTHON_VERSION, framework_version=xgboost_framework_version, ) inputs = "s3://mybucket/train" xgboost.fit(inputs=inputs) sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls] assert sagemaker_call_names == ["train", "logs_for_job"] boto_call_names = [ c[0] for c in sagemaker_session.boto_session.method_calls ] assert boto_call_names == ["resource"] expected_train_args = _create_train_job(xgboost_framework_version, DIST_INSTANCE_COUNT) expected_train_args["input_config"][0]["DataSource"]["S3DataSource"][ "S3Uri"] = inputs actual_train_args = sagemaker_session.method_calls[0][2] assert actual_train_args == expected_train_args model = xgboost.create_model() expected_image_base = "246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-xgboost:{}-cpu-{}" assert { "Environment": { "SAGEMAKER_SUBMIT_DIRECTORY": "s3://mybucket/sagemaker-xgboost-{}/source/sourcedir.tar.gz". format(TIMESTAMP), "SAGEMAKER_PROGRAM": "dummy_script.py", "SAGEMAKER_REGION": "us-west-2", "SAGEMAKER_CONTAINER_LOG_LEVEL": "20", }, "Image": expected_image_base.format(xgboost_framework_version, PYTHON_VERSION), "ModelDataUrl": "s3://m/m.tar.gz", } == model.prepare_container_def(CPU) assert "cpu" in model.prepare_container_def(CPU)["Image"] predictor = xgboost.deploy(1, CPU) assert isinstance(predictor, XGBoostPredictor)
def main(): print('Starting model training.') print('Note: if launching for the first time in local mode, container image download might take a few minutes to complete.') hyperparameters = { "max_depth": "5", "eta": "0.2", "gamma": "4", "min_child_weight": "6", "subsample": "0.7", "objective": "reg:squarederror", "num_round": "50", "verbosity": "2", } xgb_script_mode_estimator = XGBoost( entry_point="./code/abalone.py", hyperparameters=hyperparameters, role=DUMMY_IAM_ROLE, instance_count=1, instance_type='local', framework_version="1.2-1" ) train_input = TrainingInput("file://./data/train/abalone", content_type="text/libsvm") xgb_script_mode_estimator.fit({"train": train_input, "validation": train_input}) print('Completed model training') model_data = xgb_script_mode_estimator.model_data print(model_data) xgb_inference_model = XGBoostModel( model_data=model_data, role=DUMMY_IAM_ROLE, entry_point="./code/inference.py", framework_version="1.2-1", ) print('Deploying endpoint in local mode') predictor = xgb_inference_model.deploy( initial_instance_count=1, instance_type="local", ) a_young_abalone = "6 1:3 2:0.37 3:0.29 4:0.095 5:0.249 6:0.1045 7:0.058 8:0.067" do_inference_on_local_endpoint(predictor, a_young_abalone) an_old_abalone = "15 1:1 2:0.655 3:0.53 4:0.175 5:1.2635 6:0.486 7:0.2635 8:0.415" do_inference_on_local_endpoint(predictor, an_old_abalone) print('About to delete the endpoint to stop paying (if in cloud mode).') predictor.delete_endpoint(predictor.endpoint_name)
def test_xgboost_gpu(time, strftime, sagemaker_session, xgboost_gpu_framework_version): xgboost = XGBoost( entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, instance_type=GPU_INSTANCE_TYPE, instance_count=1, framework_version=xgboost_gpu_framework_version, ) inputs = "s3://mybucket/train" xgboost.fit(inputs=inputs, experiment_config=EXPERIMENT_CONFIG) sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls] assert sagemaker_call_names == ["train", "logs_for_job"] boto_call_names = [ c[0] for c in sagemaker_session.boto_session.method_calls ] assert boto_call_names == ["resource"] expected_train_args = _create_train_job(xgboost_gpu_framework_version, instance_type=GPU_INSTANCE_TYPE) expected_train_args["input_config"][0]["DataSource"]["S3DataSource"][ "S3Uri"] = inputs expected_train_args["experiment_config"] = EXPERIMENT_CONFIG actual_train_args = sagemaker_session.method_calls[0][2] assert actual_train_args == expected_train_args model = xgboost.create_model() assert { "Environment": { "SAGEMAKER_SUBMIT_DIRECTORY": "s3://mybucket/sagemaker-xgboost-{}/source/sourcedir.tar.gz". format(TIMESTAMP), "SAGEMAKER_PROGRAM": "dummy_script.py", "SAGEMAKER_REGION": "us-west-2", "SAGEMAKER_CONTAINER_LOG_LEVEL": "20", }, "Image": _get_full_image_uri(xgboost_gpu_framework_version), "ModelDataUrl": "s3://m/m.tar.gz", } == model.prepare_container_def(GPU_INSTANCE_TYPE) predictor = xgboost.deploy(1, GPU_INSTANCE_TYPE) assert isinstance(predictor, XGBoostPredictor)
def test_create_model_with_optional_params(sagemaker_session): container_log_level = '"logging.INFO"' source_dir = "s3://mybucket/source" enable_cloudwatch_metrics = "true" xgboost = XGBoost( entry_point=SCRIPT_PATH, role=ROLE, framework_version=XGBOOST_LATEST_VERSION, sagemaker_session=sagemaker_session, train_instance_type=INSTANCE_TYPE, train_instance_count=1, container_log_level=container_log_level, py_version=PYTHON_VERSION, base_job_name="job", source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics, ) xgboost.fit(inputs="s3://mybucket/train", job_name="new_name") custom_image = "ubuntu:latest" new_role = "role" model_server_workers = 2 vpc_config = {"Subnets": ["foo"], "SecurityGroupIds": ["bar"]} new_source_dir = "s3://myotherbucket/source" dependencies = ["/directory/a", "/directory/b"] model_name = "model-name" model = xgboost.create_model( image=custom_image, role=new_role, model_server_workers=model_server_workers, vpc_config_override=vpc_config, entry_point=SERVING_SCRIPT_FILE, source_dir=new_source_dir, dependencies=dependencies, name=model_name, ) assert model.image == custom_image assert model.role == new_role assert model.model_server_workers == model_server_workers assert model.vpc_config == vpc_config assert model.entry_point == SERVING_SCRIPT_FILE assert model.source_dir == new_source_dir assert model.dependencies == dependencies assert model.name == model_name
def test_create_model_with_custom_image(sagemaker_session): container_log_level = '"logging.INFO"' source_dir = "s3://mybucket/source" custom_image = "ubuntu:latest" xgboost = XGBoost( entry_point=SCRIPT_PATH, role=ROLE, framework_version=XGBOOST_LATEST_VERSION, sagemaker_session=sagemaker_session, train_instance_type=INSTANCE_TYPE, train_instance_count=1, image_name=custom_image, container_log_level=container_log_level, py_version=PYTHON_VERSION, base_job_name="job", source_dir=source_dir, ) xgboost.fit(inputs="s3://mybucket/train", job_name="new_name") model = xgboost.create_model() assert model.image == custom_image