def test_auto_ml_fit_optional_args(sagemaker_session): output_path = "s3://{}/{}".format(sagemaker_session.default_bucket(), "specified_ouput_path") problem_type = "MulticlassClassification" job_objective = {"MetricName": "Accuracy"} auto_ml = AutoML( role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session, max_candidates=1, output_path=output_path, problem_type=problem_type, job_objective=job_objective, generate_candidate_definitions_only=True, ) inputs = TRAINING_DATA with timeout(minutes=AUTO_ML_DEFAULT_TIMEMOUT_MINUTES): auto_ml.fit(inputs, job_name=unique_name_from_base(BASE_JOB_NAME)) auto_ml_desc = auto_ml.describe_auto_ml_job( job_name=auto_ml.latest_auto_ml_job.job_name) assert auto_ml_desc["AutoMLJobStatus"] == "Completed" assert auto_ml_desc["AutoMLJobName"] == auto_ml.latest_auto_ml_job.job_name assert auto_ml_desc["AutoMLJobObjective"] == job_objective assert auto_ml_desc["ProblemType"] == problem_type assert auto_ml_desc["OutputDataConfig"]["S3OutputPath"] == output_path
def test_auto_ml_default_fit(strftime, sagemaker_session): auto_ml = AutoML( role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session ) inputs = DEFAULT_S3_INPUT_DATA auto_ml.fit(inputs) sagemaker_session.auto_ml.assert_called_once() _, args = sagemaker_session.auto_ml.call_args assert args == { "input_config": [ { "DataSource": { "S3DataSource": {"S3DataType": "S3Prefix", "S3Uri": DEFAULT_S3_INPUT_DATA} }, "TargetAttributeName": TARGET_ATTRIBUTE_NAME, } ], "output_config": {"S3OutputPath": DEFAULT_OUTPUT_PATH}, "auto_ml_job_config": { "CompletionCriteria": {"MaxCandidates": DEFAULT_MAX_CANDIDATES}, "SecurityConfig": { "EnableInterContainerTrafficEncryption": ENCRYPT_INTER_CONTAINER_TRAFFIC }, }, "role": ROLE, "job_name": DEFAULT_JOB_NAME, "problem_type": None, "job_objective": None, "generate_candidate_definitions_only": GENERATE_CANDIDATE_DEFINITIONS_ONLY, "tags": None, }
def test_auto_ml_fit_set_logs_to_false(start_new, sagemaker_session, caplog): auto_ml = AutoML( role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session ) inputs = DEFAULT_S3_INPUT_DATA auto_ml.fit(inputs, job_name=JOB_NAME, wait=False, logs=True) start_new.wait.assert_not_called() assert "Setting logs to False. logs is only meaningful when wait is True." in caplog.text
def test_auto_ml_local_input(sagemaker_session): auto_ml = AutoML( role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session ) inputs = DEFAULT_S3_INPUT_DATA auto_ml.fit(inputs) sagemaker_session.auto_ml.assert_called_once() _, args = sagemaker_session.auto_ml.call_args assert args["input_config"][0]["DataSource"]["S3DataSource"]["S3Uri"] == DEFAULT_S3_INPUT_DATA
def test_auto_ml_additional_optional_params(sagemaker_session): auto_ml = AutoML( role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session, volume_kms_key=VOLUME_KMS_KEY, vpc_config=VPC_CONFIG, encrypt_inter_container_traffic=ENCRYPT_INTER_CONTAINER_TRAFFIC, compression_type=COMPRESSION_TYPE, output_kms_key=OUTPUT_KMS_KEY, output_path=OUTPUT_PATH, problem_type=PROBLEM_TYPE, max_candidates=MAX_CANDIDATES, max_runtime_per_training_job_in_seconds=MAX_RUNTIME_PER_TRAINING_JOB, total_job_runtime_in_seconds=TOTAL_JOB_RUNTIME, job_objective=JOB_OBJECTIVE, generate_candidate_definitions_only=GENERATE_CANDIDATE_DEFINITIONS_ONLY, tags=TAGS, ) inputs = DEFAULT_S3_INPUT_DATA auto_ml.fit(inputs, job_name=JOB_NAME) sagemaker_session.auto_ml.assert_called_once() _, args = sagemaker_session.auto_ml.call_args assert args == { "input_config": [ { "CompressionType": COMPRESSION_TYPE, "DataSource": { "S3DataSource": {"S3DataType": "S3Prefix", "S3Uri": DEFAULT_S3_INPUT_DATA} }, "TargetAttributeName": TARGET_ATTRIBUTE_NAME, } ], "output_config": {"S3OutputPath": OUTPUT_PATH, "KmsKeyId": OUTPUT_KMS_KEY}, "auto_ml_job_config": { "CompletionCriteria": { "MaxAutoMLJobRuntimeInSeconds": TOTAL_JOB_RUNTIME, "MaxCandidates": MAX_CANDIDATES, "MaxRuntimePerTrainingJobInSeconds": MAX_RUNTIME_PER_TRAINING_JOB, }, "SecurityConfig": { "VolumeKmsKeyId": VOLUME_KMS_KEY, "VpcConfig": VPC_CONFIG, "EnableInterContainerTrafficEncryption": ENCRYPT_INTER_CONTAINER_TRAFFIC, }, }, "job_name": JOB_NAME, "role": ROLE, "job_objective": JOB_OBJECTIVE, "problem_type": PROBLEM_TYPE, "generate_candidate_definitions_only": GENERATE_CANDIDATE_DEFINITIONS_ONLY, "tags": TAGS, }
def test_auto_ml_fit_local_input(sagemaker_session): auto_ml = AutoML( role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session, max_candidates=1, ) inputs = TRAINING_DATA with timeout(minutes=AUTO_ML_DEFAULT_TIMEMOUT_MINUTES): auto_ml.fit(inputs)
def test_auto_ml_invalid_target_attribute(sagemaker_session): auto_ml = AutoML(role=ROLE, target_attribute_name="y", sagemaker_session=sagemaker_session, max_candidates=1) inputs = sagemaker_session.upload_data(path=TRAINING_DATA, key_prefix=PREFIX + "/input") with pytest.raises( UnexpectedStatusException, match="Could not complete the data builder processing job."): auto_ml.fit(inputs)
def test_auto_ml_fit_local_input(sagemaker_session): auto_ml = AutoML( role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session, max_candidates=1, ) inputs = TRAINING_DATA job_name = unique_name_from_base("auto-ml", max_length=32) with timeout(minutes=AUTO_ML_DEFAULT_TIMEMOUT_MINUTES): auto_ml.fit(inputs, job_name=job_name)
def test_auto_ml_input_object_fit(sagemaker_session): auto_ml = AutoML( role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session, max_candidates=1, ) job_name = unique_name_from_base("auto-ml", max_length=32) s3_input = sagemaker_session.upload_data(path=TRAINING_DATA, key_prefix=PREFIX + "/input") inputs = AutoMLInput(inputs=s3_input, target_attribute_name=TARGET_ATTRIBUTE_NAME) with timeout(minutes=AUTO_ML_DEFAULT_TIMEMOUT_MINUTES): auto_ml.fit(inputs, job_name=job_name)
def test_auto_ml_invalid_target_attribute(sagemaker_session): auto_ml = AutoML( role=ROLE, target_attribute_name="y", sagemaker_session=sagemaker_session, max_candidates=1 ) job_name = unique_name_from_base("auto-ml", max_length=32) inputs = sagemaker_session.upload_data(path=TRAINING_DATA, key_prefix=PREFIX + "/input") with pytest.raises( ClientError, match=r"An error occurred \(ValidationException\) when calling the CreateAutoMLJob " "operation: Target attribute name y does not exist in header.", ): auto_ml.fit(inputs, job_name=job_name)
def test_auto_ml_fit(sagemaker_session): auto_ml = AutoML( role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session, max_candidates=3, ) inputs = sagemaker_session.upload_data(path=TRAINING_DATA, key_prefix=PREFIX + "/input") with timeout(minutes=AUTO_ML_DEFAULT_TIMEMOUT_MINUTES): auto_ml.fit(inputs)
def create_auto_ml_job_if_not_exist(sagemaker_session): auto_ml_job_name = "python-sdk-integ-test-base-job" try: sagemaker_session.describe_auto_ml_job(job_name=auto_ml_job_name) except Exception as e: # noqa: F841 auto_ml = AutoML( role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session, max_candidates=3, ) inputs = sagemaker_session.upload_data(path=TRAINING_DATA, key_prefix=PREFIX + "/input") with timeout(minutes=AUTO_ML_DEFAULT_TIMEMOUT_MINUTES): auto_ml.fit(inputs, job_name=auto_ml_job_name, wait=True)
def test_auto_ml_input(sagemaker_session): inputs = AutoMLInput( inputs=DEFAULT_S3_INPUT_DATA, target_attribute_name="target", compression="Gzip" ) auto_ml = AutoML( role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session ) auto_ml.fit(inputs) _, args = sagemaker_session.auto_ml.call_args assert args["input_config"] == [ { "CompressionType": "Gzip", "DataSource": { "S3DataSource": {"S3DataType": "S3Prefix", "S3Uri": DEFAULT_S3_INPUT_DATA} }, "TargetAttributeName": TARGET_ATTRIBUTE_NAME, } ]
def test_auto_ml_fit_optional_args(sagemaker_session): output_path = "s3://sagemaker-us-east-2-{}/{}".format( DEV_ACCOUNT, "specified_ouput_path") problem_type = "MulticlassClassification" job_objective = {"MetricName": "Accuracy"} auto_ml = AutoML( role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session, max_candidates=1, output_path=output_path, problem_type=problem_type, job_objective=job_objective, ) inputs = TRAINING_DATA with timeout(minutes=AUTO_ML_DEFAULT_TIMEMOUT_MINUTES): auto_ml.fit(inputs, job_name=JOB_NAME) auto_ml_desc = auto_ml.describe_auto_ml_job(job_name=JOB_NAME) assert auto_ml_desc["AutoMLJobStatus"] == "Completed" assert auto_ml_desc["AutoMLJobName"] == JOB_NAME assert auto_ml_desc["AutoMLJobObjective"] == job_objective assert auto_ml_desc["ProblemType"] == problem_type assert auto_ml_desc["OutputDataConfig"]["S3OutputPath"] == output_path