def test_auto_ml_input_object_fit(sagemaker_session): auto_ml = AutoML( role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session, max_candidates=1, ) job_name = unique_name_from_base("auto-ml", max_length=32) s3_input = sagemaker_session.upload_data(path=TRAINING_DATA, key_prefix=PREFIX + "/input") inputs = AutoMLInput(inputs=s3_input, target_attribute_name=TARGET_ATTRIBUTE_NAME) with timeout(minutes=AUTO_ML_DEFAULT_TIMEMOUT_MINUTES): auto_ml.fit(inputs, job_name=job_name)
def test_auto_ml_invalid_target_attribute(sagemaker_session): auto_ml = AutoML(role=ROLE, target_attribute_name="y", sagemaker_session=sagemaker_session, max_candidates=1) job_name = unique_name_from_base("auto-ml", max_length=32) inputs = sagemaker_session.upload_data(path=TRAINING_DATA, key_prefix=PREFIX + "/input") with pytest.raises( ClientError, match= r"An error occurred \(ValidationException\) when calling the CreateAutoMLJob " "operation: Target attribute name y does not exist in header.", ): auto_ml.fit(inputs, job_name=job_name)
def test_validate_and_update_inference_response_wrong_input(): cic = copy.copy(CLASSIFICATION_INFERENCE_CONTAINERS) with pytest.raises(ValueError) as excinfo: AutoML.validate_and_update_inference_response( inference_containers=cic, inference_response_keys=[ "wrong_key", "wrong_label", "probabilities", "probability" ], ) message = ( "Requested inference output keys [wrong_key, wrong_label] are unsupported. " "The supported inference keys are [probability, probabilities, predicted_label, labels]" ) assert message in str(excinfo.value)
def create_auto_ml_job_if_not_exist(sagemaker_session): auto_ml_job_name = "python-sdk-integ-test-base-job" try: sagemaker_session.describe_auto_ml_job(job_name=auto_ml_job_name) except Exception as e: # noqa: F841 auto_ml = AutoML( role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session, max_candidates=3, ) inputs = sagemaker_session.upload_data(path=TRAINING_DATA, key_prefix=PREFIX + "/input") with timeout(minutes=AUTO_ML_DEFAULT_TIMEMOUT_MINUTES): auto_ml.fit(inputs, job_name=auto_ml_job_name, wait=True)
def test_create_model_best_candidate(sagemaker_session, cpu_instance_type): auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session) auto_ml = AutoML.attach(auto_ml_job_name=AUTO_ML_JOB_NAME, sagemaker_session=sagemaker_session) best_candidate = auto_ml.best_candidate() with timeout(minutes=5): pipeline_model = auto_ml.create_model( name=DEFAULT_MODEL_NAME, candidate=best_candidate, sagemaker_session=sagemaker_session, vpc_config=None, enable_network_isolation=False, model_kms_key=None, predictor_cls=None, ) inputs = sagemaker_session.upload_data( path=TRANSFORM_DATA, key_prefix=PREFIX + "/transform_input" ) pipeline_model.transformer( instance_count=1, instance_type=cpu_instance_type, assemble_with="Line", output_path="s3://{}/{}".format(sagemaker_session.default_bucket(), "transform_test"), accept="text/csv", ).transform(data=inputs, content_type="text/csv", split_type="Line", join_source="Input")
def test_auto_ml_attach(sagemaker_session): expected_default_input_config = [{ "DataSource": { "S3DataSource": { "S3DataType": "S3Prefix", "S3Uri": "s3://{}/{}/input/iris_training.csv".format( sagemaker_session.default_bucket(), PREFIX), } }, "TargetAttributeName": TARGET_ATTRIBUTE_NAME, }] expected_default_output_config = { "S3OutputPath": "s3://{}/".format(sagemaker_session.default_bucket()) } auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session) attached_automl_job = AutoML.attach(auto_ml_job_name=AUTO_ML_JOB_NAME, sagemaker_session=sagemaker_session) attached_desc = attached_automl_job.describe_auto_ml_job() assert attached_desc["AutoMLJobName"] == AUTO_ML_JOB_NAME assert attached_desc["AutoMLJobStatus"] == "Completed" assert isinstance(attached_desc["BestCandidate"], dict) assert attached_desc["InputDataConfig"] == expected_default_input_config assert attached_desc["AutoMLJobConfig"] == EXPECTED_DEFAULT_JOB_CONFIG assert attached_desc["OutputDataConfig"] == expected_default_output_config
def test_validate_and_update_inference_response(): cic = copy.copy(CLASSIFICATION_INFERENCE_CONTAINERS) AutoML.validate_and_update_inference_response( inference_containers=cic, inference_response_keys=[ "predicted_label", "labels", "probabilities", "probability" ], ) assert (cic[2]["Environment"]["SAGEMAKER_INFERENCE_OUTPUT"] == "predicted_label,labels,probabilities,probability") assert (cic[2]["Environment"]["SAGEMAKER_INFERENCE_INPUT"] == "predicted_label,probabilities,probability") assert (cic[1]["Environment"]["SAGEMAKER_INFERENCE_OUTPUT"] == "predicted_label,probabilities,probability")
def test_create_model(sagemaker_session): auto_ml = AutoML( role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session ) pipeline_model = auto_ml.create_model( name=JOB_NAME, sagemaker_session=sagemaker_session, candidate=CLASSIFICATION_CANDIDATE_DICT, vpc_config=VPC_CONFIG, enable_network_isolation=True, model_kms_key=None, predictor_cls=None, inference_response_keys=None, ) assert isinstance(pipeline_model, PipelineModel)
def test_attach(sagemaker_session): aml = AutoML.attach(auto_ml_job_name=JOB_NAME_3, sagemaker_session=sagemaker_session) assert aml.current_job_name == JOB_NAME_3 assert aml.role == "mock_role_arn" assert aml.target_attribute_name == "y" assert aml.problem_type == "Auto" assert aml.output_path == "s3://output_prefix" assert aml.tags == LIST_TAGS_RESULT["Tags"]
def test_auto_ml_input(sagemaker_session): inputs = AutoMLInput( inputs=DEFAULT_S3_INPUT_DATA, target_attribute_name="target", compression="Gzip" ) auto_ml = AutoML( role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session ) auto_ml.fit(inputs) _, args = sagemaker_session.auto_ml.call_args assert args["input_config"] == [ { "CompressionType": "Gzip", "DataSource": { "S3DataSource": {"S3DataType": "S3Prefix", "S3Uri": DEFAULT_S3_INPUT_DATA} }, "TargetAttributeName": TARGET_ATTRIBUTE_NAME, } ]
def test_deploy_optional_args(candidate_estimator, sagemaker_session, candidate_mock): candidate_estimator.return_value = candidate_mock auto_ml = AutoML(role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session) auto_ml._deploy_inference_pipeline = Mock("_deploy_inference_pipeline", return_value=None) auto_ml.deploy( initial_instance_count=INSTANCE_COUNT, instance_type=INSTANCE_TYPE, candidate=CANDIDATE_DICT, sagemaker_session=sagemaker_session, name=JOB_NAME, endpoint_name=JOB_NAME, tags=TAGS, wait=False, update_endpoint=True, vpc_config=VPC_CONFIG, enable_network_isolation=True, model_kms_key=OUTPUT_KMS_KEY, predictor_cls=RealTimePredictor, ) auto_ml._deploy_inference_pipeline.assert_called_once() auto_ml._deploy_inference_pipeline.assert_called_with( candidate_mock.containers, initial_instance_count=INSTANCE_COUNT, instance_type=INSTANCE_TYPE, name=JOB_NAME, sagemaker_session=sagemaker_session, endpoint_name=JOB_NAME, tags=TAGS, wait=False, update_endpoint=True, vpc_config=VPC_CONFIG, enable_network_isolation=True, model_kms_key=OUTPUT_KMS_KEY, predictor_cls=RealTimePredictor, ) candidate_estimator.assert_called_with(CANDIDATE_DICT, sagemaker_session=sagemaker_session)
def test_auto_ml_default_fit(strftime, sagemaker_session): auto_ml = AutoML(role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session) inputs = DEFAULT_S3_INPUT_DATA auto_ml.fit(inputs) sagemaker_session.auto_ml.assert_called_once() _, args = sagemaker_session.auto_ml.call_args assert args == { "input_config": [{ "DataSource": { "S3DataSource": { "S3DataType": "S3Prefix", "S3Uri": DEFAULT_S3_INPUT_DATA } }, "TargetAttributeName": TARGET_ATTRIBUTE_NAME, }], "output_config": { "S3OutputPath": DEFAULT_OUTPUT_PATH }, "auto_ml_job_config": { "CompletionCriteria": { "MaxCandidates": DEFAULT_MAX_CANDIDATES }, "SecurityConfig": { "EnableInterContainerTrafficEncryption": ENCRYPT_INTER_CONTAINER_TRAFFIC }, }, "role": ROLE, "job_name": DEFAULT_JOB_NAME, "problem_type": None, "job_objective": None, "generate_candidate_definitions_only": GENERATE_CANDIDATE_DEFINITIONS_ONLY, "tags": None, }
def test_auto_ml_invalid_input_data_format(sagemaker_session): auto_ml = AutoML(role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session) inputs = {} expected_error_msg = "Cannot format input {}. Expecting one of str or list of strings." with pytest.raises(ValueError, message=expected_error_msg.format(inputs)): AutoMLJob.start_new(auto_ml, inputs) sagemaker_session.auto_ml.assert_not_called()
def get_candidates(top_n_candidates, job_name): # takes an autopilot job name, returns top candidates est = AutoML.attach(auto_ml_job_name = job_name) candidates = est.list_candidates(sort_by='FinalObjectiveMetricValue', sort_order='Descending', max_results=top_n_candidates) return est, candidates
def test_deploy_best_candidate(sagemaker_session): auto_ml = AutoML(role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session) best_candidate = auto_ml.best_candidate(job_name=AUTO_ML_JOB_NAME) endpoint_name = unique_name_from_base( "sagemaker-auto-ml-best-candidate-test") with timeout(minutes=AUTO_ML_DEFAULT_TIMEMOUT_MINUTES): auto_ml.deploy( candidate=best_candidate, initial_instance_count=INSTANCE_COUNT, instance_type=HOSTING_INSTANCE_TYPE, endpoint_name=endpoint_name, ) endpoint_status = sagemaker_session.sagemaker_client.describe_endpoint( EndpointName=endpoint_name)["EndpointStatus"] assert endpoint_status == "InService" sagemaker_session.sagemaker_client.delete_endpoint( EndpointName=endpoint_name)
def test_deploy(sagemaker_session, candidate_mock): auto_ml = AutoML(role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session) auto_ml.best_candidate = Mock(name="best_candidate", return_value=CANDIDATE_DICT) auto_ml._deploy_inference_pipeline = Mock("_deploy_inference_pipeline", return_value=None) auto_ml.deploy( initial_instance_count=INSTANCE_COUNT, instance_type=INSTANCE_TYPE, sagemaker_session=sagemaker_session, ) auto_ml._deploy_inference_pipeline.assert_called_once() auto_ml._deploy_inference_pipeline.assert_called_with( candidate_mock.containers, initial_instance_count=INSTANCE_COUNT, instance_type=INSTANCE_TYPE, name=None, sagemaker_session=sagemaker_session, endpoint_name=None, tags=None, wait=True, update_endpoint=False, vpc_config=None, enable_network_isolation=False, model_kms_key=None, predictor_cls=None, )
def test_auto_ml_only_one_of_problem_type_and_job_objective_provided(sagemaker_session): with pytest.raises( ValueError, message="One of problem type and objective metric provided. " "Either both of them should be provided or none of " "them should be provided.", ): AutoML( role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session, problem_type=PROBLEM_TYPE, )
def test_auto_ml_fit_optional_args(sagemaker_session): output_path = "s3://{}/{}".format(sagemaker_session.default_bucket(), "specified_ouput_path") problem_type = "MulticlassClassification" job_objective = {"MetricName": "Accuracy"} auto_ml = AutoML( role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session, max_candidates=1, output_path=output_path, problem_type=problem_type, job_objective=job_objective, ) inputs = TRAINING_DATA with timeout(minutes=AUTO_ML_DEFAULT_TIMEMOUT_MINUTES): auto_ml.fit(inputs, job_name=unique_name_from_base(BASE_JOB_NAME)) auto_ml_desc = auto_ml.describe_auto_ml_job(job_name=auto_ml.latest_auto_ml_job.job_name) assert auto_ml_desc["AutoMLJobStatus"] == "Completed" assert auto_ml_desc["AutoMLJobName"] == auto_ml.latest_auto_ml_job.job_name assert auto_ml_desc["AutoMLJobObjective"] == job_objective assert auto_ml_desc["ProblemType"] == problem_type assert auto_ml_desc["OutputDataConfig"]["S3OutputPath"] == output_path
def test_auto_ml_only_one_of_problem_type_and_job_objective_provided(sagemaker_session): with pytest.raises(ValueError) as excinfo: AutoML( role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session, problem_type=PROBLEM_TYPE, ) message = ( "One of problem type and objective metric provided. Either both of them " "should be provided or none of them should be provided." ) assert message in str(excinfo.value)
def test_list_candidates_with_optional_args(sagemaker_session): auto_ml = AutoML(role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session) auto_ml.list_candidates( job_name=JOB_NAME, status_equals="Completed", candidate_name="candidate-name", candidate_arn="candidate-arn", sort_order="Ascending", sort_by="Status", max_results=99, ) sagemaker_session.list_candidates.assert_called_once() _, args = sagemaker_session.list_candidates.call_args assert args == { "job_name": JOB_NAME, "status_equals": "Completed", "candidate_name": "candidate-name", "candidate_arn": "candidate-arn", "sort_order": "Ascending", "sort_by": "Status", "max_results": 99, }
def test_auto_ml_default_channel_name(sagemaker_session): auto_ml = AutoML( role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session ) inputs = DEFAULT_S3_INPUT_DATA AutoMLJob.start_new(auto_ml, inputs) sagemaker_session.auto_ml.assert_called_once() _, args = sagemaker_session.auto_ml.call_args assert args["input_config"] == [ { "DataSource": { "S3DataSource": {"S3DataType": "S3Prefix", "S3Uri": DEFAULT_S3_INPUT_DATA} }, "TargetAttributeName": TARGET_ATTRIBUTE_NAME, } ]
def test_deploy_optional_args(candidate_estimator, sagemaker_session, candidate_mock): candidate_estimator.return_value = candidate_mock auto_ml = AutoML( role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session ) mock_pipeline = Mock(name="pipeline_model") mock_pipeline.deploy = Mock(name="model_deploy") auto_ml.best_candidate = Mock(name="best_candidate", return_value=CANDIDATE_DICT) auto_ml.create_model = Mock(name="create_model", return_value=mock_pipeline) auto_ml.deploy( initial_instance_count=INSTANCE_COUNT, instance_type=INSTANCE_TYPE, candidate=CANDIDATE_DICT, sagemaker_session=sagemaker_session, name=JOB_NAME, endpoint_name=JOB_NAME, tags=TAGS, wait=False, vpc_config=VPC_CONFIG, enable_network_isolation=True, model_kms_key=OUTPUT_KMS_KEY, predictor_cls=Predictor, inference_response_keys=None, ) auto_ml.create_model.assert_called_once() auto_ml.create_model.assert_called_with( name=JOB_NAME, sagemaker_session=sagemaker_session, candidate=CANDIDATE_DICT, inference_response_keys=None, vpc_config=VPC_CONFIG, enable_network_isolation=True, model_kms_key=OUTPUT_KMS_KEY, predictor_cls=Predictor, ) mock_pipeline.deploy.assert_called_once() mock_pipeline.deploy.assert_called_with( initial_instance_count=INSTANCE_COUNT, instance_type=INSTANCE_TYPE, serializer=None, deserializer=None, endpoint_name=JOB_NAME, tags=TAGS, wait=False, )
def test_deploy(sagemaker_session, candidate_mock): auto_ml = AutoML( role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session ) mock_pipeline = Mock(name="pipeline_model") mock_pipeline.deploy = Mock(name="model_deploy") auto_ml.best_candidate = Mock(name="best_candidate", return_value=CANDIDATE_DICT) auto_ml.create_model = Mock(name="create_model", return_value=mock_pipeline) auto_ml.deploy( initial_instance_count=INSTANCE_COUNT, instance_type=INSTANCE_TYPE, sagemaker_session=sagemaker_session, ) auto_ml.create_model.assert_called_once() mock_pipeline.deploy.assert_called_once()
def lambda_handler(event, context): try : model_config = event["Input"]["taskresult"]["Payload"]["model-config"] automl_config = event["Input"]["taskresult"]["Payload"]["automl-config"] security_config = event["Input"]["taskresult"]["Payload"]["security-config"] session = Session() automl_job = AutoML.attach(automl_config["job_name"], sagemaker_session=session) model = automl_job.create_model(model_config["model_name"], inference_response_keys=model_config["inference_response_keys"]) model.models[0].env["AUTOML_SPARSE_ENCODE_RECORDIO_PROTOBUF"] = "1" session.create_model(name=model_config["model_name"], role = security_config["iam_role"], container_defs= model.pipeline_container_def(model_config["instance_type"])) except KeyError as e: raise KeyError(f"KeyError on input: {event}") return event["Input"]["taskresult"]["Payload"]
def test_auto_ml_additional_optional_params(sagemaker_session): auto_ml = AutoML( role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session, volume_kms_key=VOLUME_KMS_KEY, vpc_config=VPC_CONFIG, encrypt_inter_container_traffic=ENCRYPT_INTER_CONTAINER_TRAFFIC, compression_type=COMPRESSION_TYPE, output_kms_key=OUTPUT_KMS_KEY, output_path=OUTPUT_PATH, problem_type=PROBLEM_TYPE, max_candidates=MAX_CANDIDATES, max_runtime_per_training_job_in_seconds=MAX_RUNTIME_PER_TRAINING_JOB, total_job_runtime_in_seconds=TOTAL_JOB_RUNTIME, job_objective=JOB_OBJECTIVE, generate_candidate_definitions_only=GENERATE_CANDIDATE_DEFINITIONS_ONLY, tags=TAGS, ) inputs = DEFAULT_S3_INPUT_DATA auto_ml.fit(inputs, job_name=JOB_NAME) sagemaker_session.auto_ml.assert_called_once() _, args = sagemaker_session.auto_ml.call_args assert args == { "input_config": [{ "CompressionType": COMPRESSION_TYPE, "DataSource": { "S3DataSource": { "S3DataType": "S3Prefix", "S3Uri": DEFAULT_S3_INPUT_DATA } }, "TargetAttributeName": TARGET_ATTRIBUTE_NAME, }], "output_config": { "S3OutputPath": OUTPUT_PATH, "KmsKeyId": OUTPUT_KMS_KEY }, "auto_ml_job_config": { "CompletionCriteria": { "MaxAutoMLJobRuntimeInSeconds": TOTAL_JOB_RUNTIME, "MaxCandidates": MAX_CANDIDATES, "MaxRuntimePerTrainingJobInSeconds": MAX_RUNTIME_PER_TRAINING_JOB, }, "SecurityConfig": { "VolumeKmsKeyId": VOLUME_KMS_KEY, "VpcConfig": VPC_CONFIG, "EnableInterContainerTrafficEncryption": ENCRYPT_INTER_CONTAINER_TRAFFIC, }, }, "job_name": JOB_NAME, "role": ROLE, "job_objective": JOB_OBJECTIVE, "problem_type": PROBLEM_TYPE, "generate_candidate_definitions_only": GENERATE_CANDIDATE_DEFINITIONS_ONLY, "tags": TAGS, }