Beispiel #1
0
def test_default_model_monitor_suggest_baseline(sagemaker_session):
    my_default_monitor = DefaultModelMonitor(
        role=ROLE,
        instance_count=INSTANCE_COUNT,
        instance_type=INSTANCE_TYPE,
        volume_size_in_gb=VOLUME_SIZE_IN_GB,
        volume_kms_key=VOLUME_KMS_KEY,
        output_kms_key=OUTPUT_KMS_KEY,
        max_runtime_in_seconds=MAX_RUNTIME_IN_SECONDS,
        base_job_name=BASE_JOB_NAME,
        sagemaker_session=sagemaker_session,
        env=ENVIRONMENT,
        tags=TAGS,
        network_config=NETWORK_CONFIG,
    )

    my_default_monitor.suggest_baseline(
        baseline_dataset=BASELINE_DATASET_PATH,
        dataset_format=DatasetFormat.csv(header=False),
        record_preprocessor_script=PREPROCESSOR_PATH,
        post_analytics_processor_script=POSTPROCESSOR_PATH,
        output_s3_uri=OUTPUT_S3_URI,
        wait=False,
        logs=False,
    )

    assert my_default_monitor.role == ROLE
    assert my_default_monitor.instance_count == INSTANCE_COUNT
    assert my_default_monitor.instance_type == INSTANCE_TYPE
    assert my_default_monitor.volume_size_in_gb == VOLUME_SIZE_IN_GB
    assert my_default_monitor.volume_kms_key == VOLUME_KMS_KEY
    assert my_default_monitor.output_kms_key == OUTPUT_KMS_KEY
    assert my_default_monitor.max_runtime_in_seconds == MAX_RUNTIME_IN_SECONDS
    assert my_default_monitor.base_job_name == BASE_JOB_NAME
    assert my_default_monitor.sagemaker_session == sagemaker_session
    assert my_default_monitor.tags == TAGS
    assert my_default_monitor.network_config == NETWORK_CONFIG
    assert my_default_monitor.image_uri == DEFAULT_IMAGE_URI

    assert BASE_JOB_NAME in my_default_monitor.latest_baselining_job_name
    assert my_default_monitor.latest_baselining_job_name != BASE_JOB_NAME

    assert my_default_monitor.env[ENV_KEY_1] == ENV_VALUE_1
Beispiel #2
0
def test_default_model_monitor_with_invalid_network_config(sagemaker_session):
    invalid_network_config = NetworkConfig(
        encrypt_inter_container_traffic=False)
    my_default_monitor = DefaultModelMonitor(
        role=ROLE,
        sagemaker_session=sagemaker_session,
        network_config=invalid_network_config)
    with pytest.raises(ValueError) as exception:
        my_default_monitor.create_monitoring_schedule(
            endpoint_input="test_endpoint")
    assert INTER_CONTAINER_ENCRYPTION_EXCEPTION_MSG in str(exception.value)

    with pytest.raises(ValueError) as exception:
        my_default_monitor.update_monitoring_schedule()
    assert INTER_CONTAINER_ENCRYPTION_EXCEPTION_MSG in str(exception.value)
    def _generate_model_monitor(self, mm_type: str) -> Optional[ModelMonitor]:
        """Generates a ModelMonitor object

        Generates a ModelMonitor object with required config attributes for
            QualityCheckStep and ClarifyCheckStep

        Args:
            mm_type (str): The subclass type of ModelMonitor object.
                A valid mm_type should be one of the following: "DefaultModelMonitor",
                "ModelQualityMonitor", "ModelBiasMonitor", "ModelExplainabilityMonitor"

        Return:
            sagemaker.model_monitor.ModelMonitor or None if the mm_type is not valid

        """
        if mm_type == "DefaultModelMonitor":
            monitor = DefaultModelMonitor(
                role=self.role,
                instance_count=self.instance_count,
                instance_type=self.instance_type,
                volume_size_in_gb=self.volume_size_in_gb,
                volume_kms_key=self.volume_kms_key,
                output_kms_key=self.output_kms_key,
                max_runtime_in_seconds=self.max_runtime_in_seconds,
                base_job_name=self.base_job_name,
                sagemaker_session=self.sagemaker_session,
                env=self.env,
                tags=self.tags,
                network_config=self.network_config,
            )
        elif mm_type == "ModelQualityMonitor":
            monitor = ModelQualityMonitor(
                role=self.role,
                instance_count=self.instance_count,
                instance_type=self.instance_type,
                volume_size_in_gb=self.volume_size_in_gb,
                volume_kms_key=self.volume_kms_key,
                output_kms_key=self.output_kms_key,
                max_runtime_in_seconds=self.max_runtime_in_seconds,
                base_job_name=self.base_job_name,
                sagemaker_session=self.sagemaker_session,
                env=self.env,
                tags=self.tags,
                network_config=self.network_config,
            )
        elif mm_type == "ModelBiasMonitor":
            monitor = ModelBiasMonitor(
                role=self.role,
                instance_count=self.instance_count,
                instance_type=self.instance_type,
                volume_size_in_gb=self.volume_size_in_gb,
                volume_kms_key=self.volume_kms_key,
                output_kms_key=self.output_kms_key,
                max_runtime_in_seconds=self.max_runtime_in_seconds,
                base_job_name=self.base_job_name,
                sagemaker_session=self.sagemaker_session,
                env=self.env,
                tags=self.tags,
                network_config=self.network_config,
            )
        elif mm_type == "ModelExplainabilityMonitor":
            monitor = ModelExplainabilityMonitor(
                role=self.role,
                instance_count=self.instance_count,
                instance_type=self.instance_type,
                volume_size_in_gb=self.volume_size_in_gb,
                volume_kms_key=self.volume_kms_key,
                output_kms_key=self.output_kms_key,
                max_runtime_in_seconds=self.max_runtime_in_seconds,
                base_job_name=self.base_job_name,
                sagemaker_session=self.sagemaker_session,
                env=self.env,
                tags=self.tags,
                network_config=self.network_config,
            )
        else:
            logging.warning(
                'Expected model monitor types: "DefaultModelMonitor", "ModelQualityMonitor", '
                '"ModelBiasMonitor", "ModelExplainabilityMonitor"')
            return None
        return monitor
Beispiel #4
0
codepipeline = boto3.client('codepipeline')
response = codepipeline.get_pipeline_state( name=stack_name )
execution_id = response['stageStates'][0]['latestExecution']['pipelineExecutionId']
job_name = name_from_base(execution_id)
print('Staring Training job: {}'.format(job_name))

baseline_data_path = 's3://{0}/{1}/monitoring/baselining/data'.format(bucket_name, prefix)
baseline_results_path = 's3://{0}/{1}/monitoring/baselining/results'.format(bucket_name, prefix)

print(baseline_data_path)
print(baseline_results_path)

my_default_monitor = DefaultModelMonitor(
    role=execution_role,
    instance_count=1,
    instance_type='ml.c5.4xlarge',
    volume_size_in_gb=20,
    max_runtime_in_seconds=3600,
)

my_default_monitor.suggest_baseline(
    job_name=job_name, 
    baseline_dataset=baseline_data_path,
    dataset_format=DatasetFormat.csv(header=True),
    output_s3_uri=baseline_results_path,
    logs=False, # Disable to avoid noisy logging, only meaningful when wait=True
    wait=True
)

# save environment variables
Beispiel #5
0
# Now it is time to apply the new configuration
predictor = Predictor(endpoint_name=endpoint_name, sagemaker_session=sagemaker_session)
predictor.update_data_capture_config(data_capture_config=data_capture_config)

print('Created Predictor at endpoint {}'.format(endpoint_name))

baseline_data_uri = args.baseline_data_uri ##'s3://bucketname/path/to/baseline/data' - Where your validation data is
baseline_results_uri = f's3://{bucket}/{prefix}/baseline/results' ##'s3://bucketname/path/to/baseline/data' - Where the results are to be stored in

print('Baseline data is at {}'.format(baseline_data_uri))

my_default_monitor = DefaultModelMonitor(
    role=get_execution_role(sagemaker_session=sagemaker_session),
    sagemaker_session=sagemaker_session,
    instance_count=2,
    instance_type='ml.m5.4xlarge',
    volume_size_in_gb=60,
    max_runtime_in_seconds=1800,
)


my_default_monitor.suggest_baseline(
    baseline_dataset=baseline_data_uri,
    dataset_format=DatasetFormat.csv(header=False),
    output_s3_uri=baseline_results_uri,
    wait=True
)

print('Model data baseline suggested at {}'.format(baseline_results_uri))

import datetime as datetime
Beispiel #6
0
    print("\n ".join(capture_files))

    def get_obj_body(obj_key):
        return s3_client.get_object(Bucket=bucket, Key=obj_key).get('Body').read().decode("utf-8")

    capture_file = get_obj_body(capture_files[-1])
    print(capture_file[:2000])

    print(json.dumps(json.loads(capture_file.split('\n')[0]), indent=2))

role = get_sm_execution_role(ON_SAGEMAKER_NOTEBOOK, boto_sess.region_name)

my_default_monitor = DefaultModelMonitor(
        role=role,
        instance_count=1,
        instance_type='ml.m5.xlarge',
        volume_size_in_gb=20,
        max_runtime_in_seconds=3600,
)
def create_baseline():
    print(f'Baseline data uri: {baseline_data_uri}')
    print(f'Baseline results uri: {baseline_results_uri}')

    my_default_monitor.suggest_baseline(
        baseline_dataset=baseline_data_uri,
        dataset_format=DatasetFormat.csv(header=False),
        output_s3_uri=baseline_results_uri,
        wait=True
    )

mon_schedule_name = 'xgb-boston-pred-model-monitor-schedule-' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
    destination_s3_uri=s3_capture_upload_path,
    kms_key_id=None,
    capture_options=["REQUEST", "RESPONSE"],
    csv_content_types=["text/csv"],
    json_content_types=["application/json"],
)

# Now it is time to apply the new configuration and wait for it to be applied
predictor = RealTimePredictor(endpoint=endpoint_name)
predictor.update_data_capture_config(data_capture_config=data_capture_config)
sm_session.wait_for_endpoint(endpoint=endpoint_name)

my_default_monitor = DefaultModelMonitor(
    role=role,
    instance_count="${var.training_job_instance_count}",
    instance_type="${var.training_job_instance_type}",
    volume_size_in_gb="${var.training_job_instance_type}",
    max_runtime_in_seconds="${var.data_drift_job_timeout_in_sec}",
)

# now ask Sagemaker to suggest baseline stats
my_default_monitor.suggest_baseline(
    baseline_dataset=baseline_data_uri + "/train.csv",
    dataset_format=DatasetFormat.csv(header=True),
    output_s3_uri=baseline_results_uri,
    wait=True,
)

my_default_monitor.create_monitoring_schedule(
    monitor_schedule_name=mon_schedule_name,
    endpoint_input=predictor.endpoint,