Example #1
0
def test_auto_ml_attach(sagemaker_session):
    expected_default_input_config = [{
        "DataSource": {
            "S3DataSource": {
                "S3DataType":
                "S3Prefix",
                "S3Uri":
                "s3://{}/{}/input/iris_training.csv".format(
                    sagemaker_session.default_bucket(), PREFIX),
            }
        },
        "TargetAttributeName":
        TARGET_ATTRIBUTE_NAME,
    }]
    expected_default_output_config = {
        "S3OutputPath": "s3://{}/".format(sagemaker_session.default_bucket())
    }

    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session)

    attached_automl_job = AutoML.attach(auto_ml_job_name=AUTO_ML_JOB_NAME,
                                        sagemaker_session=sagemaker_session)
    attached_desc = attached_automl_job.describe_auto_ml_job()
    assert attached_desc["AutoMLJobName"] == AUTO_ML_JOB_NAME
    assert attached_desc["AutoMLJobStatus"] == "Completed"
    assert isinstance(attached_desc["BestCandidate"], dict)
    assert attached_desc["InputDataConfig"] == expected_default_input_config
    assert attached_desc["AutoMLJobConfig"] == EXPECTED_DEFAULT_JOB_CONFIG
    assert attached_desc["OutputDataConfig"] == expected_default_output_config
Example #2
0
def test_candidate_estimator_default_rerun_and_deploy(sagemaker_session,
                                                      cpu_instance_type):
    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session)

    auto_ml = AutoML(role=ROLE,
                     target_attribute_name=TARGET_ATTRIBUTE_NAME,
                     sagemaker_session=sagemaker_session)

    candidates = auto_ml.list_candidates(job_name=AUTO_ML_JOB_NAME)
    candidate = candidates[1]

    candidate_estimator = CandidateEstimator(candidate, sagemaker_session)
    inputs = sagemaker_session.upload_data(path=TEST_DATA,
                                           key_prefix=PREFIX + "/input")
    endpoint_name = unique_name_from_base(
        "sagemaker-auto-ml-rerun-candidate-test")
    with timeout(minutes=AUTO_ML_DEFAULT_TIMEMOUT_MINUTES):
        candidate_estimator.fit(inputs)
        auto_ml.deploy(
            initial_instance_count=INSTANCE_COUNT,
            instance_type=cpu_instance_type,
            candidate=candidate,
            endpoint_name=endpoint_name,
        )

    endpoint_status = sagemaker_session.sagemaker_client.describe_endpoint(
        EndpointName=endpoint_name)["EndpointStatus"]
    assert endpoint_status == "InService"
    sagemaker_session.sagemaker_client.delete_endpoint(
        EndpointName=endpoint_name)
Example #3
0
def test_create_model_best_candidate(sagemaker_session, cpu_instance_type):
    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session)

    auto_ml = AutoML.attach(auto_ml_job_name=AUTO_ML_JOB_NAME, sagemaker_session=sagemaker_session)
    best_candidate = auto_ml.best_candidate()

    with timeout(minutes=5):
        pipeline_model = auto_ml.create_model(
            name=DEFAULT_MODEL_NAME,
            candidate=best_candidate,
            sagemaker_session=sagemaker_session,
            vpc_config=None,
            enable_network_isolation=False,
            model_kms_key=None,
            predictor_cls=None,
        )
    inputs = sagemaker_session.upload_data(
        path=TRANSFORM_DATA, key_prefix=PREFIX + "/transform_input"
    )
    pipeline_model.transformer(
        instance_count=1,
        instance_type=cpu_instance_type,
        assemble_with="Line",
        output_path="s3://{}/{}".format(sagemaker_session.default_bucket(), "transform_test"),
        accept="text/csv",
    ).transform(data=inputs, content_type="text/csv", split_type="Line", join_source="Input")
Example #4
0
def test_auto_ml_describe_auto_ml_job(sagemaker_session):
    expected_default_input_config = [{
        "DataSource": {
            "S3DataSource": {
                "S3DataType":
                "S3Prefix",
                "S3Uri":
                "s3://{}/{}/input/iris_training.csv".format(
                    sagemaker_session.default_bucket(), PREFIX),
            }
        },
        "TargetAttributeName": TARGET_ATTRIBUTE_NAME,
        "ContentType": "text/csv;header=present",
        "ChannelType": "training",
    }]
    expected_default_output_config = {
        "S3OutputPath": "s3://{}/".format(sagemaker_session.default_bucket())
    }

    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session)
    auto_ml = AutoML(role=ROLE,
                     target_attribute_name=TARGET_ATTRIBUTE_NAME,
                     sagemaker_session=sagemaker_session)

    desc = auto_ml.describe_auto_ml_job(job_name=AUTO_ML_JOB_NAME)
    assert desc["AutoMLJobName"] == AUTO_ML_JOB_NAME
    assert desc["AutoMLJobStatus"] == "Completed"
    assert isinstance(desc["BestCandidate"], dict)
    assert desc["InputDataConfig"] == expected_default_input_config
    assert desc["AutoMLJobConfig"] == EXPECTED_DEFAULT_JOB_CONFIG
    assert desc["OutputDataConfig"] == expected_default_output_config
Example #5
0
def test_list_candidates(sagemaker_session):
    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session)

    auto_ml = AutoML(role=ROLE,
                     target_attribute_name=TARGET_ATTRIBUTE_NAME,
                     sagemaker_session=sagemaker_session)

    candidates = auto_ml.list_candidates(job_name=AUTO_ML_JOB_NAME)
    assert len(candidates) == 3
Example #6
0
def test_best_candidate(sagemaker_session):
    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session)

    auto_ml = AutoML(role=ROLE,
                     target_attribute_name=TARGET_ATTRIBUTE_NAME,
                     sagemaker_session=sagemaker_session)
    best_candidate = auto_ml.best_candidate(job_name=AUTO_ML_JOB_NAME)
    assert len(best_candidate["InferenceContainers"]) == 3
    assert len(best_candidate["CandidateSteps"]) == 4
    assert best_candidate["CandidateStatus"] == "Completed"
Example #7
0
def test_candidate_estimator_get_steps(sagemaker_session):
    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session)

    auto_ml = AutoML(role=ROLE,
                     target_attribute_name=TARGET_ATTRIBUTE_NAME,
                     sagemaker_session=sagemaker_session)
    candidates = auto_ml.list_candidates(job_name=AUTO_ML_JOB_NAME)
    candidate = candidates[1]

    candidate_estimator = CandidateEstimator(candidate, sagemaker_session)
    steps = candidate_estimator.get_steps()
    assert len(steps) == 3
Example #8
0
def test_deploy_best_candidate(sagemaker_session, cpu_instance_type):
    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session)

    auto_ml = AutoML(
        role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session
    )
    best_candidate = auto_ml.best_candidate(job_name=AUTO_ML_JOB_NAME)
    endpoint_name = unique_name_from_base("sagemaker-auto-ml-best-candidate-test")

    with timeout(minutes=AUTO_ML_DEFAULT_TIMEMOUT_MINUTES):
        auto_ml.deploy(
            candidate=best_candidate,
            initial_instance_count=INSTANCE_COUNT,
            instance_type=cpu_instance_type,
            endpoint_name=endpoint_name,
        )

    endpoint_status = sagemaker_session.sagemaker_client.describe_endpoint(
        EndpointName=endpoint_name
    )["EndpointStatus"]
    assert endpoint_status == "InService"
    sagemaker_session.sagemaker_client.delete_endpoint(EndpointName=endpoint_name)