def test_shap_with_predicted_label(
    name_from_base,
    clarify_processor,
    clarify_processor_with_job_name_prefix,
    data_config,
    model_config,
    shap_config,
):
    probability = "pr"
    label_headers = ["success"]
    model_scores = ModelPredictedLabelConfig(
        probability=probability,
        label_headers=label_headers,
    )
    expected_predictor_config = {
        "model_name": "xgboost-model",
        "instance_type": "ml.c5.xlarge",
        "initial_instance_count": 1,
        "probability": probability,
        "label_headers": label_headers,
    }
    _run_test_shap(
        name_from_base,
        clarify_processor,
        clarify_processor_with_job_name_prefix,
        data_config,
        model_config,
        shap_config,
        model_scores,
        expected_predictor_config,
    )
Beispiel #2
0
def model_predicted_label_config(sagemaker_session, model_name, training_set):
    predictor = LinearLearnerPredictor(
        model_name,
        sagemaker_session=sagemaker_session,
    )
    result = predictor.predict(training_set[0].astype(np.float32))
    predictions = [float(record.label["score"].float32_tensor.values[0]) for record in result]
    probability_threshold = statistics.median(predictions)
    return ModelPredictedLabelConfig(label="score", probability_threshold=probability_threshold)
Beispiel #3
0
def test_invalid_model_predicted_label_config():
    with pytest.raises(TypeError) as error:
        ModelPredictedLabelConfig(
            probability_threshold="invalid",
        )
    assert (
        "Invalid probability_threshold invalid. Please choose one that can be cast to float."
        in str(error.value)
    )
def test_model_predicted_label_config():
    label = "label"
    probability = "pr"
    probability_threshold = 0.2
    label_headers = ["success"]
    model_config = ModelPredictedLabelConfig(
        label=label,
        probability=probability,
        probability_threshold=probability_threshold,
        label_headers=label_headers,
    )
    pr_threshold, config = model_config.get_predictor_config()
    expected_config = {
        "label": label,
        "probability": probability,
        "label_headers": label_headers,
    }
    assert probability_threshold == pr_threshold
    assert expected_config == config
Beispiel #5
0
def test_processing_step_with_clarify_processor(pipeline_session):
    def headers():
        return [
            "Label",
            "F1",
            "F2",
            "F3",
            "F4",
        ]

    def data_bias_config():
        return BiasConfig(
            label_values_or_threshold=[1],
            facet_name="F1",
            facet_values_or_threshold=[0.5],
            group_name="F2",
        )

    def model_config(model_name):
        return ModelConfig(
            model_name=model_name,
            instance_type="ml.c5.xlarge",
            instance_count=1,
            accept_type="application/jsonlines",
            endpoint_name_prefix="myprefix",
        )

    def shap_config():
        return SHAPConfig(
            baseline=[
                [
                    0.94672389,
                    0.47108862,
                    0.63350081,
                    0.00604642,
                ]
            ],
            num_samples=2,
            agg_method="mean_sq",
            seed=123,
        )

    def verfiy(step_args):
        step = ProcessingStep(
            name="MyProcessingStep",
            step_args=step_args,
        )
        pipeline = Pipeline(
            name="MyPipeline",
            steps=[step],
            sagemaker_session=pipeline_session,
        )
        assert json.loads(pipeline.definition())["Steps"][0] == {
            "Name": "MyProcessingStep",
            "Type": "Processing",
            "Arguments": step_args,
        }

    test_run = utils.unique_name_from_base("test_run")
    output_path = "s3://{}/{}/{}".format(
        pipeline_session.default_bucket(), "linear_learner_analysis_result", test_run
    )
    data_config = DataConfig(
        s3_data_input_path=f"s3://{pipeline_session.default_bucket()}/{input}/train.csv",
        s3_output_path=output_path,
        label="Label",
        headers=headers(),
        dataset_type="text/csv",
    )

    clarify_processor = SageMakerClarifyProcessor(
        instance_type=INSTANCE_TYPE,
        instance_count=1,
        sagemaker_session=pipeline_session,
        role=sagemaker.get_execution_role(),
    )

    run_bias_args = clarify_processor.run_bias(
        data_config=data_config,
        bias_config=data_bias_config(),
        model_config=model_config("1st-model-rpyndy0uyo"),
    )
    verfiy(run_bias_args)

    run_pre_training_bias_args = clarify_processor.run_pre_training_bias(
        data_config=data_config,
        data_bias_config=data_bias_config(),
    )
    verfiy(run_pre_training_bias_args)

    run_post_training_bias_args = clarify_processor.run_post_training_bias(
        data_config=data_config,
        data_bias_config=data_bias_config(),
        model_config=model_config("1st-model-rpyndy0uyo"),
        model_predicted_label_config=ModelPredictedLabelConfig(probability_threshold=0.9),
    )
    verfiy(run_post_training_bias_args)

    run_explainability_args = clarify_processor.run_explainability(
        data_config=data_config,
        model_config=model_config("1st-model-rpyndy0uyo"),
        explainability_config=shap_config(),
    )
    verfiy(run_explainability_args)
def model_predicted_label_config():
    return ModelPredictedLabelConfig(
        probability_threshold=BIAS_PROBABILITY_THRESHOLD)
def model_predicted_label_config():
    return ModelPredictedLabelConfig()
def predictions_config():
    return ModelPredictedLabelConfig(probability_threshold=0.8)