Ejemplo n.º 1
0
def test_auto_ml_attach(sagemaker_session):
    expected_default_input_config = [{
        "DataSource": {
            "S3DataSource": {
                "S3DataType":
                "S3Prefix",
                "S3Uri":
                "s3://{}/{}/input/iris_training.csv".format(
                    sagemaker_session.default_bucket(), PREFIX),
            }
        },
        "TargetAttributeName":
        TARGET_ATTRIBUTE_NAME,
    }]
    expected_default_output_config = {
        "S3OutputPath": "s3://{}/".format(sagemaker_session.default_bucket())
    }

    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session)

    attached_automl_job = AutoML.attach(auto_ml_job_name=AUTO_ML_JOB_NAME,
                                        sagemaker_session=sagemaker_session)
    attached_desc = attached_automl_job.describe_auto_ml_job()
    assert attached_desc["AutoMLJobName"] == AUTO_ML_JOB_NAME
    assert attached_desc["AutoMLJobStatus"] == "Completed"
    assert isinstance(attached_desc["BestCandidate"], dict)
    assert attached_desc["InputDataConfig"] == expected_default_input_config
    assert attached_desc["AutoMLJobConfig"] == EXPECTED_DEFAULT_JOB_CONFIG
    assert attached_desc["OutputDataConfig"] == expected_default_output_config
Ejemplo n.º 2
0
def test_create_model_best_candidate(sagemaker_session, cpu_instance_type):
    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session)

    auto_ml = AutoML.attach(auto_ml_job_name=AUTO_ML_JOB_NAME, sagemaker_session=sagemaker_session)
    best_candidate = auto_ml.best_candidate()

    with timeout(minutes=5):
        pipeline_model = auto_ml.create_model(
            name=DEFAULT_MODEL_NAME,
            candidate=best_candidate,
            sagemaker_session=sagemaker_session,
            vpc_config=None,
            enable_network_isolation=False,
            model_kms_key=None,
            predictor_cls=None,
        )
    inputs = sagemaker_session.upload_data(
        path=TRANSFORM_DATA, key_prefix=PREFIX + "/transform_input"
    )
    pipeline_model.transformer(
        instance_count=1,
        instance_type=cpu_instance_type,
        assemble_with="Line",
        output_path="s3://{}/{}".format(sagemaker_session.default_bucket(), "transform_test"),
        accept="text/csv",
    ).transform(data=inputs, content_type="text/csv", split_type="Line", join_source="Input")
def test_attach(sagemaker_session):
    aml = AutoML.attach(auto_ml_job_name=JOB_NAME_3, sagemaker_session=sagemaker_session)
    assert aml.current_job_name == JOB_NAME_3
    assert aml.role == "mock_role_arn"
    assert aml.target_attribute_name == "y"
    assert aml.problem_type == "Auto"
    assert aml.output_path == "s3://output_prefix"
    assert aml.tags == LIST_TAGS_RESULT["Tags"]
Ejemplo n.º 4
0
def get_candidates(top_n_candidates, job_name):
    # takes an autopilot job name, returns top candidates

    est = AutoML.attach(auto_ml_job_name = job_name)

    candidates = est.list_candidates(sort_by='FinalObjectiveMetricValue',
                                    sort_order='Descending',
                                    max_results=top_n_candidates)
    
    return est, candidates
def lambda_handler(event, context):
    
    try :
        model_config = event["Input"]["taskresult"]["Payload"]["model-config"]
        automl_config = event["Input"]["taskresult"]["Payload"]["automl-config"]
        security_config = event["Input"]["taskresult"]["Payload"]["security-config"]
    
        session = Session()
        automl_job = AutoML.attach(automl_config["job_name"],
                                sagemaker_session=session)
                                
        model = automl_job.create_model(model_config["model_name"], 
                                inference_response_keys=model_config["inference_response_keys"])
                                
        model.models[0].env["AUTOML_SPARSE_ENCODE_RECORDIO_PROTOBUF"] = "1"

        session.create_model(name=model_config["model_name"], 
                            role = security_config["iam_role"],
                            container_defs= model.pipeline_container_def(model_config["instance_type"]))
                                        
    except KeyError as e:
        raise KeyError(f"KeyError on input: {event}")
            
    return event["Input"]["taskresult"]["Payload"]