Ejemplo n.º 1
0
def test_auto_ml_input_object_fit(sagemaker_session):
    auto_ml = AutoML(
        role=ROLE,
        target_attribute_name=TARGET_ATTRIBUTE_NAME,
        sagemaker_session=sagemaker_session,
        max_candidates=1,
    )
    job_name = unique_name_from_base("auto-ml", max_length=32)
    s3_input = sagemaker_session.upload_data(path=TRAINING_DATA,
                                             key_prefix=PREFIX + "/input")
    inputs = AutoMLInput(inputs=s3_input,
                         target_attribute_name=TARGET_ATTRIBUTE_NAME)
    with timeout(minutes=AUTO_ML_DEFAULT_TIMEMOUT_MINUTES):
        auto_ml.fit(inputs, job_name=job_name)
Ejemplo n.º 2
0
def test_auto_ml_invalid_target_attribute(sagemaker_session):
    auto_ml = AutoML(role=ROLE,
                     target_attribute_name="y",
                     sagemaker_session=sagemaker_session,
                     max_candidates=1)
    job_name = unique_name_from_base("auto-ml", max_length=32)
    inputs = sagemaker_session.upload_data(path=TRAINING_DATA,
                                           key_prefix=PREFIX + "/input")
    with pytest.raises(
            ClientError,
            match=
            r"An error occurred \(ValidationException\) when calling the CreateAutoMLJob "
            "operation: Target attribute name y does not exist in header.",
    ):
        auto_ml.fit(inputs, job_name=job_name)
Ejemplo n.º 3
0
def test_validate_and_update_inference_response_wrong_input():
    cic = copy.copy(CLASSIFICATION_INFERENCE_CONTAINERS)

    with pytest.raises(ValueError) as excinfo:
        AutoML.validate_and_update_inference_response(
            inference_containers=cic,
            inference_response_keys=[
                "wrong_key", "wrong_label", "probabilities", "probability"
            ],
        )
    message = (
        "Requested inference output keys [wrong_key, wrong_label] are unsupported. "
        "The supported inference keys are [probability, probabilities, predicted_label, labels]"
    )
    assert message in str(excinfo.value)
def create_auto_ml_job_if_not_exist(sagemaker_session):
    auto_ml_job_name = "python-sdk-integ-test-base-job"

    try:
        sagemaker_session.describe_auto_ml_job(job_name=auto_ml_job_name)
    except Exception as e:  # noqa: F841
        auto_ml = AutoML(
            role=ROLE,
            target_attribute_name=TARGET_ATTRIBUTE_NAME,
            sagemaker_session=sagemaker_session,
            max_candidates=3,
        )
        inputs = sagemaker_session.upload_data(path=TRAINING_DATA, key_prefix=PREFIX + "/input")
        with timeout(minutes=AUTO_ML_DEFAULT_TIMEMOUT_MINUTES):
            auto_ml.fit(inputs, job_name=auto_ml_job_name, wait=True)
Ejemplo n.º 5
0
def test_create_model_best_candidate(sagemaker_session, cpu_instance_type):
    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session)

    auto_ml = AutoML.attach(auto_ml_job_name=AUTO_ML_JOB_NAME, sagemaker_session=sagemaker_session)
    best_candidate = auto_ml.best_candidate()

    with timeout(minutes=5):
        pipeline_model = auto_ml.create_model(
            name=DEFAULT_MODEL_NAME,
            candidate=best_candidate,
            sagemaker_session=sagemaker_session,
            vpc_config=None,
            enable_network_isolation=False,
            model_kms_key=None,
            predictor_cls=None,
        )
    inputs = sagemaker_session.upload_data(
        path=TRANSFORM_DATA, key_prefix=PREFIX + "/transform_input"
    )
    pipeline_model.transformer(
        instance_count=1,
        instance_type=cpu_instance_type,
        assemble_with="Line",
        output_path="s3://{}/{}".format(sagemaker_session.default_bucket(), "transform_test"),
        accept="text/csv",
    ).transform(data=inputs, content_type="text/csv", split_type="Line", join_source="Input")
Ejemplo n.º 6
0
def test_auto_ml_attach(sagemaker_session):
    expected_default_input_config = [{
        "DataSource": {
            "S3DataSource": {
                "S3DataType":
                "S3Prefix",
                "S3Uri":
                "s3://{}/{}/input/iris_training.csv".format(
                    sagemaker_session.default_bucket(), PREFIX),
            }
        },
        "TargetAttributeName":
        TARGET_ATTRIBUTE_NAME,
    }]
    expected_default_output_config = {
        "S3OutputPath": "s3://{}/".format(sagemaker_session.default_bucket())
    }

    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session)

    attached_automl_job = AutoML.attach(auto_ml_job_name=AUTO_ML_JOB_NAME,
                                        sagemaker_session=sagemaker_session)
    attached_desc = attached_automl_job.describe_auto_ml_job()
    assert attached_desc["AutoMLJobName"] == AUTO_ML_JOB_NAME
    assert attached_desc["AutoMLJobStatus"] == "Completed"
    assert isinstance(attached_desc["BestCandidate"], dict)
    assert attached_desc["InputDataConfig"] == expected_default_input_config
    assert attached_desc["AutoMLJobConfig"] == EXPECTED_DEFAULT_JOB_CONFIG
    assert attached_desc["OutputDataConfig"] == expected_default_output_config
Ejemplo n.º 7
0
def test_validate_and_update_inference_response():
    cic = copy.copy(CLASSIFICATION_INFERENCE_CONTAINERS)

    AutoML.validate_and_update_inference_response(
        inference_containers=cic,
        inference_response_keys=[
            "predicted_label", "labels", "probabilities", "probability"
        ],
    )

    assert (cic[2]["Environment"]["SAGEMAKER_INFERENCE_OUTPUT"] ==
            "predicted_label,labels,probabilities,probability")
    assert (cic[2]["Environment"]["SAGEMAKER_INFERENCE_INPUT"] ==
            "predicted_label,probabilities,probability")
    assert (cic[1]["Environment"]["SAGEMAKER_INFERENCE_OUTPUT"] ==
            "predicted_label,probabilities,probability")
def test_create_model(sagemaker_session):
    auto_ml = AutoML(
        role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session
    )

    pipeline_model = auto_ml.create_model(
        name=JOB_NAME,
        sagemaker_session=sagemaker_session,
        candidate=CLASSIFICATION_CANDIDATE_DICT,
        vpc_config=VPC_CONFIG,
        enable_network_isolation=True,
        model_kms_key=None,
        predictor_cls=None,
        inference_response_keys=None,
    )

    assert isinstance(pipeline_model, PipelineModel)
def test_attach(sagemaker_session):
    aml = AutoML.attach(auto_ml_job_name=JOB_NAME_3, sagemaker_session=sagemaker_session)
    assert aml.current_job_name == JOB_NAME_3
    assert aml.role == "mock_role_arn"
    assert aml.target_attribute_name == "y"
    assert aml.problem_type == "Auto"
    assert aml.output_path == "s3://output_prefix"
    assert aml.tags == LIST_TAGS_RESULT["Tags"]
def test_auto_ml_input(sagemaker_session):
    inputs = AutoMLInput(
        inputs=DEFAULT_S3_INPUT_DATA, target_attribute_name="target", compression="Gzip"
    )
    auto_ml = AutoML(
        role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session
    )
    auto_ml.fit(inputs)
    _, args = sagemaker_session.auto_ml.call_args
    assert args["input_config"] == [
        {
            "CompressionType": "Gzip",
            "DataSource": {
                "S3DataSource": {"S3DataType": "S3Prefix", "S3Uri": DEFAULT_S3_INPUT_DATA}
            },
            "TargetAttributeName": TARGET_ATTRIBUTE_NAME,
        }
    ]
def test_deploy_optional_args(candidate_estimator, sagemaker_session,
                              candidate_mock):
    candidate_estimator.return_value = candidate_mock

    auto_ml = AutoML(role=ROLE,
                     target_attribute_name=TARGET_ATTRIBUTE_NAME,
                     sagemaker_session=sagemaker_session)
    auto_ml._deploy_inference_pipeline = Mock("_deploy_inference_pipeline",
                                              return_value=None)

    auto_ml.deploy(
        initial_instance_count=INSTANCE_COUNT,
        instance_type=INSTANCE_TYPE,
        candidate=CANDIDATE_DICT,
        sagemaker_session=sagemaker_session,
        name=JOB_NAME,
        endpoint_name=JOB_NAME,
        tags=TAGS,
        wait=False,
        update_endpoint=True,
        vpc_config=VPC_CONFIG,
        enable_network_isolation=True,
        model_kms_key=OUTPUT_KMS_KEY,
        predictor_cls=RealTimePredictor,
    )
    auto_ml._deploy_inference_pipeline.assert_called_once()
    auto_ml._deploy_inference_pipeline.assert_called_with(
        candidate_mock.containers,
        initial_instance_count=INSTANCE_COUNT,
        instance_type=INSTANCE_TYPE,
        name=JOB_NAME,
        sagemaker_session=sagemaker_session,
        endpoint_name=JOB_NAME,
        tags=TAGS,
        wait=False,
        update_endpoint=True,
        vpc_config=VPC_CONFIG,
        enable_network_isolation=True,
        model_kms_key=OUTPUT_KMS_KEY,
        predictor_cls=RealTimePredictor,
    )

    candidate_estimator.assert_called_with(CANDIDATE_DICT,
                                           sagemaker_session=sagemaker_session)
def test_auto_ml_default_fit(strftime, sagemaker_session):
    auto_ml = AutoML(role=ROLE,
                     target_attribute_name=TARGET_ATTRIBUTE_NAME,
                     sagemaker_session=sagemaker_session)
    inputs = DEFAULT_S3_INPUT_DATA
    auto_ml.fit(inputs)
    sagemaker_session.auto_ml.assert_called_once()
    _, args = sagemaker_session.auto_ml.call_args
    assert args == {
        "input_config": [{
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "S3Prefix",
                    "S3Uri": DEFAULT_S3_INPUT_DATA
                }
            },
            "TargetAttributeName": TARGET_ATTRIBUTE_NAME,
        }],
        "output_config": {
            "S3OutputPath": DEFAULT_OUTPUT_PATH
        },
        "auto_ml_job_config": {
            "CompletionCriteria": {
                "MaxCandidates": DEFAULT_MAX_CANDIDATES
            },
            "SecurityConfig": {
                "EnableInterContainerTrafficEncryption":
                ENCRYPT_INTER_CONTAINER_TRAFFIC
            },
        },
        "role":
        ROLE,
        "job_name":
        DEFAULT_JOB_NAME,
        "problem_type":
        None,
        "job_objective":
        None,
        "generate_candidate_definitions_only":
        GENERATE_CANDIDATE_DEFINITIONS_ONLY,
        "tags":
        None,
    }
def test_auto_ml_invalid_input_data_format(sagemaker_session):
    auto_ml = AutoML(role=ROLE,
                     target_attribute_name=TARGET_ATTRIBUTE_NAME,
                     sagemaker_session=sagemaker_session)
    inputs = {}

    expected_error_msg = "Cannot format input {}. Expecting one of str or list of strings."
    with pytest.raises(ValueError, message=expected_error_msg.format(inputs)):
        AutoMLJob.start_new(auto_ml, inputs)
    sagemaker_session.auto_ml.assert_not_called()
Ejemplo n.º 14
0
def get_candidates(top_n_candidates, job_name):
    # takes an autopilot job name, returns top candidates

    est = AutoML.attach(auto_ml_job_name = job_name)

    candidates = est.list_candidates(sort_by='FinalObjectiveMetricValue',
                                    sort_order='Descending',
                                    max_results=top_n_candidates)
    
    return est, candidates
Ejemplo n.º 15
0
def test_deploy_best_candidate(sagemaker_session):
    auto_ml = AutoML(role=ROLE,
                     target_attribute_name=TARGET_ATTRIBUTE_NAME,
                     sagemaker_session=sagemaker_session)
    best_candidate = auto_ml.best_candidate(job_name=AUTO_ML_JOB_NAME)
    endpoint_name = unique_name_from_base(
        "sagemaker-auto-ml-best-candidate-test")

    with timeout(minutes=AUTO_ML_DEFAULT_TIMEMOUT_MINUTES):
        auto_ml.deploy(
            candidate=best_candidate,
            initial_instance_count=INSTANCE_COUNT,
            instance_type=HOSTING_INSTANCE_TYPE,
            endpoint_name=endpoint_name,
        )

    endpoint_status = sagemaker_session.sagemaker_client.describe_endpoint(
        EndpointName=endpoint_name)["EndpointStatus"]
    assert endpoint_status == "InService"
    sagemaker_session.sagemaker_client.delete_endpoint(
        EndpointName=endpoint_name)
def test_deploy(sagemaker_session, candidate_mock):
    auto_ml = AutoML(role=ROLE,
                     target_attribute_name=TARGET_ATTRIBUTE_NAME,
                     sagemaker_session=sagemaker_session)
    auto_ml.best_candidate = Mock(name="best_candidate",
                                  return_value=CANDIDATE_DICT)
    auto_ml._deploy_inference_pipeline = Mock("_deploy_inference_pipeline",
                                              return_value=None)
    auto_ml.deploy(
        initial_instance_count=INSTANCE_COUNT,
        instance_type=INSTANCE_TYPE,
        sagemaker_session=sagemaker_session,
    )
    auto_ml._deploy_inference_pipeline.assert_called_once()
    auto_ml._deploy_inference_pipeline.assert_called_with(
        candidate_mock.containers,
        initial_instance_count=INSTANCE_COUNT,
        instance_type=INSTANCE_TYPE,
        name=None,
        sagemaker_session=sagemaker_session,
        endpoint_name=None,
        tags=None,
        wait=True,
        update_endpoint=False,
        vpc_config=None,
        enable_network_isolation=False,
        model_kms_key=None,
        predictor_cls=None,
    )
def test_auto_ml_only_one_of_problem_type_and_job_objective_provided(sagemaker_session):
    with pytest.raises(
        ValueError,
        message="One of problem type and objective metric provided. "
        "Either both of them should be provided or none of "
        "them should be provided.",
    ):
        AutoML(
            role=ROLE,
            target_attribute_name=TARGET_ATTRIBUTE_NAME,
            sagemaker_session=sagemaker_session,
            problem_type=PROBLEM_TYPE,
        )
Ejemplo n.º 18
0
def test_auto_ml_fit_optional_args(sagemaker_session):
    output_path = "s3://{}/{}".format(sagemaker_session.default_bucket(), "specified_ouput_path")
    problem_type = "MulticlassClassification"
    job_objective = {"MetricName": "Accuracy"}
    auto_ml = AutoML(
        role=ROLE,
        target_attribute_name=TARGET_ATTRIBUTE_NAME,
        sagemaker_session=sagemaker_session,
        max_candidates=1,
        output_path=output_path,
        problem_type=problem_type,
        job_objective=job_objective,
    )
    inputs = TRAINING_DATA
    with timeout(minutes=AUTO_ML_DEFAULT_TIMEMOUT_MINUTES):
        auto_ml.fit(inputs, job_name=unique_name_from_base(BASE_JOB_NAME))

    auto_ml_desc = auto_ml.describe_auto_ml_job(job_name=auto_ml.latest_auto_ml_job.job_name)
    assert auto_ml_desc["AutoMLJobStatus"] == "Completed"
    assert auto_ml_desc["AutoMLJobName"] == auto_ml.latest_auto_ml_job.job_name
    assert auto_ml_desc["AutoMLJobObjective"] == job_objective
    assert auto_ml_desc["ProblemType"] == problem_type
    assert auto_ml_desc["OutputDataConfig"]["S3OutputPath"] == output_path
def test_auto_ml_only_one_of_problem_type_and_job_objective_provided(sagemaker_session):
    with pytest.raises(ValueError) as excinfo:
        AutoML(
            role=ROLE,
            target_attribute_name=TARGET_ATTRIBUTE_NAME,
            sagemaker_session=sagemaker_session,
            problem_type=PROBLEM_TYPE,
        )

    message = (
        "One of problem type and objective metric provided. Either both of them "
        "should be provided or none of them should be provided."
    )
    assert message in str(excinfo.value)
def test_list_candidates_with_optional_args(sagemaker_session):
    auto_ml = AutoML(role=ROLE,
                     target_attribute_name=TARGET_ATTRIBUTE_NAME,
                     sagemaker_session=sagemaker_session)
    auto_ml.list_candidates(
        job_name=JOB_NAME,
        status_equals="Completed",
        candidate_name="candidate-name",
        candidate_arn="candidate-arn",
        sort_order="Ascending",
        sort_by="Status",
        max_results=99,
    )
    sagemaker_session.list_candidates.assert_called_once()
    _, args = sagemaker_session.list_candidates.call_args
    assert args == {
        "job_name": JOB_NAME,
        "status_equals": "Completed",
        "candidate_name": "candidate-name",
        "candidate_arn": "candidate-arn",
        "sort_order": "Ascending",
        "sort_by": "Status",
        "max_results": 99,
    }
def test_auto_ml_default_channel_name(sagemaker_session):
    auto_ml = AutoML(
        role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session
    )
    inputs = DEFAULT_S3_INPUT_DATA
    AutoMLJob.start_new(auto_ml, inputs)
    sagemaker_session.auto_ml.assert_called_once()
    _, args = sagemaker_session.auto_ml.call_args
    assert args["input_config"] == [
        {
            "DataSource": {
                "S3DataSource": {"S3DataType": "S3Prefix", "S3Uri": DEFAULT_S3_INPUT_DATA}
            },
            "TargetAttributeName": TARGET_ATTRIBUTE_NAME,
        }
    ]
def test_deploy_optional_args(candidate_estimator, sagemaker_session, candidate_mock):
    candidate_estimator.return_value = candidate_mock

    auto_ml = AutoML(
        role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session
    )
    mock_pipeline = Mock(name="pipeline_model")
    mock_pipeline.deploy = Mock(name="model_deploy")
    auto_ml.best_candidate = Mock(name="best_candidate", return_value=CANDIDATE_DICT)
    auto_ml.create_model = Mock(name="create_model", return_value=mock_pipeline)

    auto_ml.deploy(
        initial_instance_count=INSTANCE_COUNT,
        instance_type=INSTANCE_TYPE,
        candidate=CANDIDATE_DICT,
        sagemaker_session=sagemaker_session,
        name=JOB_NAME,
        endpoint_name=JOB_NAME,
        tags=TAGS,
        wait=False,
        vpc_config=VPC_CONFIG,
        enable_network_isolation=True,
        model_kms_key=OUTPUT_KMS_KEY,
        predictor_cls=Predictor,
        inference_response_keys=None,
    )

    auto_ml.create_model.assert_called_once()
    auto_ml.create_model.assert_called_with(
        name=JOB_NAME,
        sagemaker_session=sagemaker_session,
        candidate=CANDIDATE_DICT,
        inference_response_keys=None,
        vpc_config=VPC_CONFIG,
        enable_network_isolation=True,
        model_kms_key=OUTPUT_KMS_KEY,
        predictor_cls=Predictor,
    )

    mock_pipeline.deploy.assert_called_once()

    mock_pipeline.deploy.assert_called_with(
        initial_instance_count=INSTANCE_COUNT,
        instance_type=INSTANCE_TYPE,
        serializer=None,
        deserializer=None,
        endpoint_name=JOB_NAME,
        tags=TAGS,
        wait=False,
    )
def test_deploy(sagemaker_session, candidate_mock):
    auto_ml = AutoML(
        role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session
    )
    mock_pipeline = Mock(name="pipeline_model")
    mock_pipeline.deploy = Mock(name="model_deploy")
    auto_ml.best_candidate = Mock(name="best_candidate", return_value=CANDIDATE_DICT)
    auto_ml.create_model = Mock(name="create_model", return_value=mock_pipeline)
    auto_ml.deploy(
        initial_instance_count=INSTANCE_COUNT,
        instance_type=INSTANCE_TYPE,
        sagemaker_session=sagemaker_session,
    )
    auto_ml.create_model.assert_called_once()
    mock_pipeline.deploy.assert_called_once()
def lambda_handler(event, context):
    
    try :
        model_config = event["Input"]["taskresult"]["Payload"]["model-config"]
        automl_config = event["Input"]["taskresult"]["Payload"]["automl-config"]
        security_config = event["Input"]["taskresult"]["Payload"]["security-config"]
    
        session = Session()
        automl_job = AutoML.attach(automl_config["job_name"],
                                sagemaker_session=session)
                                
        model = automl_job.create_model(model_config["model_name"], 
                                inference_response_keys=model_config["inference_response_keys"])
                                
        model.models[0].env["AUTOML_SPARSE_ENCODE_RECORDIO_PROTOBUF"] = "1"

        session.create_model(name=model_config["model_name"], 
                            role = security_config["iam_role"],
                            container_defs= model.pipeline_container_def(model_config["instance_type"]))
                                        
    except KeyError as e:
        raise KeyError(f"KeyError on input: {event}")
            
    return event["Input"]["taskresult"]["Payload"]
def test_auto_ml_additional_optional_params(sagemaker_session):
    auto_ml = AutoML(
        role=ROLE,
        target_attribute_name=TARGET_ATTRIBUTE_NAME,
        sagemaker_session=sagemaker_session,
        volume_kms_key=VOLUME_KMS_KEY,
        vpc_config=VPC_CONFIG,
        encrypt_inter_container_traffic=ENCRYPT_INTER_CONTAINER_TRAFFIC,
        compression_type=COMPRESSION_TYPE,
        output_kms_key=OUTPUT_KMS_KEY,
        output_path=OUTPUT_PATH,
        problem_type=PROBLEM_TYPE,
        max_candidates=MAX_CANDIDATES,
        max_runtime_per_training_job_in_seconds=MAX_RUNTIME_PER_TRAINING_JOB,
        total_job_runtime_in_seconds=TOTAL_JOB_RUNTIME,
        job_objective=JOB_OBJECTIVE,
        generate_candidate_definitions_only=GENERATE_CANDIDATE_DEFINITIONS_ONLY,
        tags=TAGS,
    )
    inputs = DEFAULT_S3_INPUT_DATA
    auto_ml.fit(inputs, job_name=JOB_NAME)
    sagemaker_session.auto_ml.assert_called_once()
    _, args = sagemaker_session.auto_ml.call_args

    assert args == {
        "input_config": [{
            "CompressionType": COMPRESSION_TYPE,
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "S3Prefix",
                    "S3Uri": DEFAULT_S3_INPUT_DATA
                }
            },
            "TargetAttributeName": TARGET_ATTRIBUTE_NAME,
        }],
        "output_config": {
            "S3OutputPath": OUTPUT_PATH,
            "KmsKeyId": OUTPUT_KMS_KEY
        },
        "auto_ml_job_config": {
            "CompletionCriteria": {
                "MaxAutoMLJobRuntimeInSeconds": TOTAL_JOB_RUNTIME,
                "MaxCandidates": MAX_CANDIDATES,
                "MaxRuntimePerTrainingJobInSeconds":
                MAX_RUNTIME_PER_TRAINING_JOB,
            },
            "SecurityConfig": {
                "VolumeKmsKeyId":
                VOLUME_KMS_KEY,
                "VpcConfig":
                VPC_CONFIG,
                "EnableInterContainerTrafficEncryption":
                ENCRYPT_INTER_CONTAINER_TRAFFIC,
            },
        },
        "job_name":
        JOB_NAME,
        "role":
        ROLE,
        "job_objective":
        JOB_OBJECTIVE,
        "problem_type":
        PROBLEM_TYPE,
        "generate_candidate_definitions_only":
        GENERATE_CANDIDATE_DEFINITIONS_ONLY,
        "tags":
        TAGS,
    }