Ejemplo n.º 1
0
def test_auto_ml_fit_optional_args(sagemaker_session):
    output_path = "s3://{}/{}".format(sagemaker_session.default_bucket(),
                                      "specified_ouput_path")
    problem_type = "MulticlassClassification"
    job_objective = {"MetricName": "Accuracy"}
    auto_ml = AutoML(
        role=ROLE,
        target_attribute_name=TARGET_ATTRIBUTE_NAME,
        sagemaker_session=sagemaker_session,
        max_candidates=1,
        output_path=output_path,
        problem_type=problem_type,
        job_objective=job_objective,
        generate_candidate_definitions_only=True,
    )
    inputs = TRAINING_DATA
    with timeout(minutes=AUTO_ML_DEFAULT_TIMEMOUT_MINUTES):
        auto_ml.fit(inputs, job_name=unique_name_from_base(BASE_JOB_NAME))

    auto_ml_desc = auto_ml.describe_auto_ml_job(
        job_name=auto_ml.latest_auto_ml_job.job_name)
    assert auto_ml_desc["AutoMLJobStatus"] == "Completed"
    assert auto_ml_desc["AutoMLJobName"] == auto_ml.latest_auto_ml_job.job_name
    assert auto_ml_desc["AutoMLJobObjective"] == job_objective
    assert auto_ml_desc["ProblemType"] == problem_type
    assert auto_ml_desc["OutputDataConfig"]["S3OutputPath"] == output_path
def test_auto_ml_default_fit(strftime, sagemaker_session):
    auto_ml = AutoML(
        role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session
    )
    inputs = DEFAULT_S3_INPUT_DATA
    auto_ml.fit(inputs)
    sagemaker_session.auto_ml.assert_called_once()
    _, args = sagemaker_session.auto_ml.call_args
    assert args == {
        "input_config": [
            {
                "DataSource": {
                    "S3DataSource": {"S3DataType": "S3Prefix", "S3Uri": DEFAULT_S3_INPUT_DATA}
                },
                "TargetAttributeName": TARGET_ATTRIBUTE_NAME,
            }
        ],
        "output_config": {"S3OutputPath": DEFAULT_OUTPUT_PATH},
        "auto_ml_job_config": {
            "CompletionCriteria": {"MaxCandidates": DEFAULT_MAX_CANDIDATES},
            "SecurityConfig": {
                "EnableInterContainerTrafficEncryption": ENCRYPT_INTER_CONTAINER_TRAFFIC
            },
        },
        "role": ROLE,
        "job_name": DEFAULT_JOB_NAME,
        "problem_type": None,
        "job_objective": None,
        "generate_candidate_definitions_only": GENERATE_CANDIDATE_DEFINITIONS_ONLY,
        "tags": None,
    }
def test_auto_ml_fit_set_logs_to_false(start_new, sagemaker_session, caplog):
    auto_ml = AutoML(
        role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session
    )
    inputs = DEFAULT_S3_INPUT_DATA
    auto_ml.fit(inputs, job_name=JOB_NAME, wait=False, logs=True)
    start_new.wait.assert_not_called()
    assert "Setting logs to False. logs is only meaningful when wait is True." in caplog.text
def test_auto_ml_local_input(sagemaker_session):
    auto_ml = AutoML(
        role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session
    )
    inputs = DEFAULT_S3_INPUT_DATA
    auto_ml.fit(inputs)
    sagemaker_session.auto_ml.assert_called_once()
    _, args = sagemaker_session.auto_ml.call_args
    assert args["input_config"][0]["DataSource"]["S3DataSource"]["S3Uri"] == DEFAULT_S3_INPUT_DATA
def test_auto_ml_additional_optional_params(sagemaker_session):
    auto_ml = AutoML(
        role=ROLE,
        target_attribute_name=TARGET_ATTRIBUTE_NAME,
        sagemaker_session=sagemaker_session,
        volume_kms_key=VOLUME_KMS_KEY,
        vpc_config=VPC_CONFIG,
        encrypt_inter_container_traffic=ENCRYPT_INTER_CONTAINER_TRAFFIC,
        compression_type=COMPRESSION_TYPE,
        output_kms_key=OUTPUT_KMS_KEY,
        output_path=OUTPUT_PATH,
        problem_type=PROBLEM_TYPE,
        max_candidates=MAX_CANDIDATES,
        max_runtime_per_training_job_in_seconds=MAX_RUNTIME_PER_TRAINING_JOB,
        total_job_runtime_in_seconds=TOTAL_JOB_RUNTIME,
        job_objective=JOB_OBJECTIVE,
        generate_candidate_definitions_only=GENERATE_CANDIDATE_DEFINITIONS_ONLY,
        tags=TAGS,
    )
    inputs = DEFAULT_S3_INPUT_DATA
    auto_ml.fit(inputs, job_name=JOB_NAME)
    sagemaker_session.auto_ml.assert_called_once()
    _, args = sagemaker_session.auto_ml.call_args

    assert args == {
        "input_config": [
            {
                "CompressionType": COMPRESSION_TYPE,
                "DataSource": {
                    "S3DataSource": {"S3DataType": "S3Prefix", "S3Uri": DEFAULT_S3_INPUT_DATA}
                },
                "TargetAttributeName": TARGET_ATTRIBUTE_NAME,
            }
        ],
        "output_config": {"S3OutputPath": OUTPUT_PATH, "KmsKeyId": OUTPUT_KMS_KEY},
        "auto_ml_job_config": {
            "CompletionCriteria": {
                "MaxAutoMLJobRuntimeInSeconds": TOTAL_JOB_RUNTIME,
                "MaxCandidates": MAX_CANDIDATES,
                "MaxRuntimePerTrainingJobInSeconds": MAX_RUNTIME_PER_TRAINING_JOB,
            },
            "SecurityConfig": {
                "VolumeKmsKeyId": VOLUME_KMS_KEY,
                "VpcConfig": VPC_CONFIG,
                "EnableInterContainerTrafficEncryption": ENCRYPT_INTER_CONTAINER_TRAFFIC,
            },
        },
        "job_name": JOB_NAME,
        "role": ROLE,
        "job_objective": JOB_OBJECTIVE,
        "problem_type": PROBLEM_TYPE,
        "generate_candidate_definitions_only": GENERATE_CANDIDATE_DEFINITIONS_ONLY,
        "tags": TAGS,
    }
Ejemplo n.º 6
0
def test_auto_ml_fit_local_input(sagemaker_session):
    auto_ml = AutoML(
        role=ROLE,
        target_attribute_name=TARGET_ATTRIBUTE_NAME,
        sagemaker_session=sagemaker_session,
        max_candidates=1,
    )

    inputs = TRAINING_DATA
    with timeout(minutes=AUTO_ML_DEFAULT_TIMEMOUT_MINUTES):
        auto_ml.fit(inputs)
Ejemplo n.º 7
0
def test_auto_ml_invalid_target_attribute(sagemaker_session):
    auto_ml = AutoML(role=ROLE,
                     target_attribute_name="y",
                     sagemaker_session=sagemaker_session,
                     max_candidates=1)
    inputs = sagemaker_session.upload_data(path=TRAINING_DATA,
                                           key_prefix=PREFIX + "/input")
    with pytest.raises(
            UnexpectedStatusException,
            match="Could not complete the data builder processing job."):
        auto_ml.fit(inputs)
def test_auto_ml_fit_local_input(sagemaker_session):
    auto_ml = AutoML(
        role=ROLE,
        target_attribute_name=TARGET_ATTRIBUTE_NAME,
        sagemaker_session=sagemaker_session,
        max_candidates=1,
    )

    inputs = TRAINING_DATA
    job_name = unique_name_from_base("auto-ml", max_length=32)
    with timeout(minutes=AUTO_ML_DEFAULT_TIMEMOUT_MINUTES):
        auto_ml.fit(inputs, job_name=job_name)
Ejemplo n.º 9
0
def test_auto_ml_input_object_fit(sagemaker_session):
    auto_ml = AutoML(
        role=ROLE,
        target_attribute_name=TARGET_ATTRIBUTE_NAME,
        sagemaker_session=sagemaker_session,
        max_candidates=1,
    )
    job_name = unique_name_from_base("auto-ml", max_length=32)
    s3_input = sagemaker_session.upload_data(path=TRAINING_DATA, key_prefix=PREFIX + "/input")
    inputs = AutoMLInput(inputs=s3_input, target_attribute_name=TARGET_ATTRIBUTE_NAME)
    with timeout(minutes=AUTO_ML_DEFAULT_TIMEMOUT_MINUTES):
        auto_ml.fit(inputs, job_name=job_name)
Ejemplo n.º 10
0
def test_auto_ml_invalid_target_attribute(sagemaker_session):
    auto_ml = AutoML(
        role=ROLE, target_attribute_name="y", sagemaker_session=sagemaker_session, max_candidates=1
    )
    job_name = unique_name_from_base("auto-ml", max_length=32)
    inputs = sagemaker_session.upload_data(path=TRAINING_DATA, key_prefix=PREFIX + "/input")
    with pytest.raises(
        ClientError,
        match=r"An error occurred \(ValidationException\) when calling the CreateAutoMLJob "
        "operation: Target attribute name y does not exist in header.",
    ):
        auto_ml.fit(inputs, job_name=job_name)
Ejemplo n.º 11
0
def test_auto_ml_fit(sagemaker_session):
    auto_ml = AutoML(
        role=ROLE,
        target_attribute_name=TARGET_ATTRIBUTE_NAME,
        sagemaker_session=sagemaker_session,
        max_candidates=3,
    )

    inputs = sagemaker_session.upload_data(path=TRAINING_DATA,
                                           key_prefix=PREFIX + "/input")
    with timeout(minutes=AUTO_ML_DEFAULT_TIMEMOUT_MINUTES):
        auto_ml.fit(inputs)
Ejemplo n.º 12
0
def create_auto_ml_job_if_not_exist(sagemaker_session):
    auto_ml_job_name = "python-sdk-integ-test-base-job"

    try:
        sagemaker_session.describe_auto_ml_job(job_name=auto_ml_job_name)
    except Exception as e:  # noqa: F841
        auto_ml = AutoML(
            role=ROLE,
            target_attribute_name=TARGET_ATTRIBUTE_NAME,
            sagemaker_session=sagemaker_session,
            max_candidates=3,
        )
        inputs = sagemaker_session.upload_data(path=TRAINING_DATA, key_prefix=PREFIX + "/input")
        with timeout(minutes=AUTO_ML_DEFAULT_TIMEMOUT_MINUTES):
            auto_ml.fit(inputs, job_name=auto_ml_job_name, wait=True)
def test_auto_ml_input(sagemaker_session):
    inputs = AutoMLInput(
        inputs=DEFAULT_S3_INPUT_DATA, target_attribute_name="target", compression="Gzip"
    )
    auto_ml = AutoML(
        role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session
    )
    auto_ml.fit(inputs)
    _, args = sagemaker_session.auto_ml.call_args
    assert args["input_config"] == [
        {
            "CompressionType": "Gzip",
            "DataSource": {
                "S3DataSource": {"S3DataType": "S3Prefix", "S3Uri": DEFAULT_S3_INPUT_DATA}
            },
            "TargetAttributeName": TARGET_ATTRIBUTE_NAME,
        }
    ]
Ejemplo n.º 14
0
def test_auto_ml_fit_optional_args(sagemaker_session):
    output_path = "s3://sagemaker-us-east-2-{}/{}".format(
        DEV_ACCOUNT, "specified_ouput_path")
    problem_type = "MulticlassClassification"
    job_objective = {"MetricName": "Accuracy"}
    auto_ml = AutoML(
        role=ROLE,
        target_attribute_name=TARGET_ATTRIBUTE_NAME,
        sagemaker_session=sagemaker_session,
        max_candidates=1,
        output_path=output_path,
        problem_type=problem_type,
        job_objective=job_objective,
    )
    inputs = TRAINING_DATA
    with timeout(minutes=AUTO_ML_DEFAULT_TIMEMOUT_MINUTES):
        auto_ml.fit(inputs, job_name=JOB_NAME)

    auto_ml_desc = auto_ml.describe_auto_ml_job(job_name=JOB_NAME)
    assert auto_ml_desc["AutoMLJobStatus"] == "Completed"
    assert auto_ml_desc["AutoMLJobName"] == JOB_NAME
    assert auto_ml_desc["AutoMLJobObjective"] == job_objective
    assert auto_ml_desc["ProblemType"] == problem_type
    assert auto_ml_desc["OutputDataConfig"]["S3OutputPath"] == output_path