Exemple #1
0
def main(resources):

    AWS_DEFAULT_REGION = os.getenv('AWS_DEFAULT_REGION', 'eu-west-1')
    AWS_PROFILE = os.getenv('AWS_PROFILE', 'default')
    AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID', None)
    AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY', None)
    b3_session, sm_client, sm_runtime, sm_session = get_sm_session(
        region=AWS_DEFAULT_REGION,
        profile_name=AWS_PROFILE,
        aws_access_key_id=AWS_ACCESS_KEY_ID,
        aws_secret_access_key=AWS_SECRET_ACCESS_KEY)

    # remove resourses created by deploymodel.py and setup_mq.py
    if 'monitor' in resources and 'schedule_name' in resources['monitor']:
        print("Removing Model Quality Schedule")
        delete_schedule(resources['monitor']['schedule_name'], sm_client)

    if 'endpoint' in resources:
        predictor = SKLearnPredictor(resources['endpoint']['name'],
                                     sagemaker_session=sm_session)
        print("Removing model from registry")
        predictor.delete_model()
        print("Removing endpoint")
        predictor.delete_endpoint(delete_endpoint_config=True)

    print("None of the S3 resources were deleted !!!")
def main(deploy_data, train_data):
    inference_id_prefix = 'sts_'  # Comes from deploymodel.py
    outputs = {'inferences': []}

    # AWS especific
    AWS_DEFAULT_REGION = os.getenv('AWS_DEFAULT_REGION', 'eu-west-1')
    AWS_PROFILE = os.getenv('AWS_PROFILE', 'default')
    AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID', None)
    AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY', None)
    b3_session, sm_client, sm_runtime, sm_session = get_sm_session(
        region=AWS_DEFAULT_REGION,
        profile_name=AWS_PROFILE,
        aws_access_key_id=AWS_ACCESS_KEY_ID,
        aws_secret_access_key=AWS_SECRET_ACCESS_KEY)

    # Load a predictor using the endpoint name
    predictor = SKLearnPredictor(
        deploy_data['endpoint']['name'],
        sagemaker_session=sm_session,
        serializer=CSVSerializer(),  # necesary or MQM don't work
        deserializer=CSVDeserializer()  # necesary or MQM don't work
    )

    # read test data
    test_data = load_dataset(train_data['train']['test'],
                             'test.csv',
                             sagemaker_session=sm_session)
    print(f"Loadding {train_data['train']['test']}")

    # remove labels in the test dataset
    test_data.drop(test_data.columns[0], axis=1, inplace=True)

    # Iterate over the test data and call the endpoint for each row,
    # stop for 2 seconds for rows divisible by 3, just to make time
    x_test_rows = test_data.values
    print(f"Sending trafic to the endpoint: {deploy_data['endpoint']['name']}")
    with progressbar.ProgressBar(max_value=len(x_test_rows)) as bar:
        for index, x_test_row in enumerate(x_test_rows, start=1):
            # Auto-generate an inference-id to track the request/response
            # in the captured data
            inference_id = '{}{}'.format(inference_id_prefix, index)

            result = predictor.predict(x_test_row, inference_id=inference_id)

            outputs['inferences'].append({
                inference_id: {
                    'input': x_test_row.tolist(),
                    'result': result
                }
            })

            # show progress
            bar.update(index)

    with open('testendpoint_out.json', 'w') as f:
        json.dump(outputs, f)
Exemple #3
0
def main(resources, train_data):

    # configurarion
    AWS_DEFAULT_REGION = os.getenv('AWS_DEFAULT_REGION', 'eu-west-1')
    AWS_PROFILE = os.getenv('AWS_PROFILE', 'default')
    AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID', None)
    AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY', None)
    b3_session, sm_client, sm_runtime, sm_session = get_sm_session(
        region=AWS_DEFAULT_REGION,
        profile_name=AWS_PROFILE,
        aws_access_key_id=AWS_ACCESS_KEY_ID,
        aws_secret_access_key=AWS_SECRET_ACCESS_KEY)
    BASE_JOB_PREFIX = os.getenv('BASE_JOB_PREFIX', 'sts')
    ROLE_ARN = os.getenv('AWS_ROLE', sagemaker.get_execution_role())
    outputs = resources

    bucket = sm_session.default_bucket()
    prefix = "{}/{}".format(BASE_JOB_PREFIX, resources['endpoint']['name'])
    if 'monitor' not in resources:
        raise ValueError("Monitoring not enabled")

    if 's3_capture_upload_path' not in resources['monitor']:
        raise ValueError("Monitoring not enabled")

    baseline_prefix = prefix + "/baselining"
    baseline_data_prefix = baseline_prefix + "/data"
    baseline_results_prefix = baseline_prefix + "/results"
    baseline_data_uri = "s3://{}/{}".format(bucket, baseline_data_prefix)
    baseline_results_uri = "s3://{}/{}".format(bucket, baseline_results_prefix)
    outputs['monitor'].update({
        'baseline': {
            'data_uri': baseline_data_uri,
            'results_uri': baseline_results_uri
        }
    })
    _l.info("Baseline data uri: {}".format(baseline_data_uri))
    _l.info("Baseline results uri: {}".format(baseline_results_uri))

    ground_truth_upload_path = f"s3://{bucket}/{prefix}/ground_truth_data"
    _l.info(f"Ground truth uri: {ground_truth_upload_path}")
    outputs['monitor'].update({'ground truth uri': ground_truth_upload_path})

    # Create a baselining job with training dataset
    _l.info("Executing a baselining job with training dataset")
    _l.info(f"baseline_data_uri: {train_data['baseline']['validate']}")
    my_monitor = ModelQualityMonitor(
        role=ROLE_ARN,
        sagemaker_session=sm_session,
        max_runtime_in_seconds=1800  # 30 minutes
    )
    my_monitor.suggest_baseline(
        baseline_dataset=train_data['baseline']['validate'] + "/baseline.csv",
        dataset_format=DatasetFormat.csv(header=True),
        problem_type="Regression",
        inference_attribute="prediction",
        ground_truth_attribute="label",
        output_s3_uri=baseline_results_uri,
        wait=True)
    baseline_job = my_monitor.latest_baselining_job
    _l.info("suggested baseline contrains")
    _l.info(
        pprint.pformat(baseline_job.suggested_constraints().
                       body_dict["regression_constraints"]))
    _l.info("suggested baseline statistics")
    _l.info(
        pprint.pformat(baseline_job.baseline_statistics().
                       body_dict["regression_metrics"]))

    monitor_schedule_name = (
        f"{BASE_JOB_PREFIX}-mq-sch-{datetime.datetime.utcnow():%Y-%m-%d-%H%M}")
    _l.info(f"Monitoring schedule name: {monitor_schedule_name}")
    outputs['monitor'].update({'schedule_name': monitor_schedule_name})
    endpointInput = EndpointInput(
        resources['endpoint']['name'],
        "/opt/ml/processing/input_data",
        inference_attribute='0'  # REVIEW:
    )

    my_monitor.create_monitoring_schedule(
        monitor_schedule_name=monitor_schedule_name,
        endpoint_input=endpointInput,
        output_s3_uri=baseline_results_uri,
        problem_type="Regression",
        ground_truth_input=ground_truth_upload_path,
        constraints=baseline_job.suggested_constraints(),
        # run the scheduler hourly
        schedule_cron_expression=CronExpressionGenerator.hourly(),
        enable_cloudwatch_metrics=True,
    )
    mq_schedule_details = my_monitor.describe_schedule()
    while mq_schedule_details['MonitoringScheduleStatus'] == 'Pending':
        _l.info(f'Waiting for {monitor_schedule_name}')
        time.sleep(3)
        mq_schedule_details = my_monitor.describe_schedule()
    _l.debug(
        f"Model Quality Monitor - schedule details: {pprint.pformat(mq_schedule_details)}"
    )
    _l.info(
        f"Model Quality Monitor - schedule status: {mq_schedule_details['MonitoringScheduleStatus']}"
    )

    # save outputs to a file
    with open('deploymodel_out.json', 'w') as f:
        json.dump(outputs, f, default=json_default)
Exemple #4
0
def main(datacapture=False):
    # Load config from environment and set required defaults
    # AWS especific
    AWS_DEFAULT_REGION = os.getenv('AWS_DEFAULT_REGION', 'eu-west-1')
    AWS_PROFILE = os.getenv('AWS_PROFILE', 'default')
    AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID', None)
    AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY', None)
    b3_session, sm_client, sm_runtime, sm_session = get_sm_session(
        region=AWS_DEFAULT_REGION,
        profile_name=AWS_PROFILE,
        aws_access_key_id=AWS_ACCESS_KEY_ID,
        aws_secret_access_key=AWS_SECRET_ACCESS_KEY
    )
    ROLE_ARN = os.getenv('AWS_ROLE', sagemaker.get_execution_role())

    MODEL_PACKAGE_GROUP_NAME = os.getenv(
        'MODEL_PACKAGE_GROUP_NAME', 'sts-sklearn-grp')
    BASE_JOB_PREFIX = os.getenv('BASE_JOB_PREFIX', 'sts')

    # define useful const's
    bucket = sm_session.default_bucket()
    endpoint_name = "{}-sklearn-{}".format(
        BASE_JOB_PREFIX,
        datetime.datetime.now().strftime("%Y%m%d%H%M")
    )
    prefix = "{}/{}".format(BASE_JOB_PREFIX, endpoint_name)
    data_capture_prefix = "{}/datacapture".format(prefix)
    s3_capture_upload_path = "s3://{}/{}".format(bucket, data_capture_prefix)
    # outputs is a dict to save to json
    outputs = dict()

    if datacapture is True:
        # if data capture was enabled output the S3 Uri for data capture
        outputs['monitor'] = {
            's3_capture_upload_path': s3_capture_upload_path
        }

    # get the last version aproved in the model package group
    model_package_arn = get_approved_package(
        MODEL_PACKAGE_GROUP_NAME, sm_client)
    _l.info(f"Latest approved model package: {model_package_arn}")
    model_info = sm_client.describe_model_package(
        ModelPackageName=model_package_arn)
    outputs['model_info'] = model_info
    model_uri = model_info.get(
        'InferenceSpecification')['Containers'][0]['ModelDataUrl']
    _l.info(f"Model data uri: {model_uri}")

    sk_model = SKLearnModel(
        model_uri,  # s3 uri for the model.tar.gz
        ROLE_ARN,   # sagemaker role to be used
        'model_loader.py',  # script to load the model
        framework_version='0.23-1'
    )

    data_capture_config=None
    if datacapture is True:
        # if data capture was enabled generated the required config
        _l.info("Enabling data capture as requested")
        _l.info(f"s3_capture_upload_path: {s3_capture_upload_path}")
        data_capture_config = DataCaptureConfig(
            enable_capture=True, sampling_percentage=100, 
            destination_s3_uri=s3_capture_upload_path,
            capture_options=["REQUEST", "RESPONSE"],
            sagemaker_session=sm_session
        )

    # Deploy the endpoint
    predictor = sk_model.deploy(
        instance_type="ml.m5.xlarge", 
        initial_instance_count=1,
        serializer=CSVSerializer(),
        deserializer=CSVDeserializer(),
        data_capture_config=data_capture_config,
        endpoint_name=endpoint_name
    )

    _l.info(f"Endpoint name: {predictor.endpoint_name}")
    outputs['endpoint'] = {
        'name': endpoint_name,
        'config_name': predictor.endpoint_name # is the same as the endpoint ?
    }
    outputs['model_info'].update({"name": sk_model.name})
    # ENDPOINT deploy done

    # save useful outputs to a file
    with open('deploymodel_out.json', 'w') as f:
        json.dump(outputs, f, default=json_default)
def main(deploy_data: dict, train_data: dict, capture_prefix: str):
    inference_id_prefix = 'sts_'  # the same used in testendpoint.py

    # Load config from environment and set required defaults
    # AWS especific
    AWS_DEFAULT_REGION = os.getenv('AWS_DEFAULT_REGION', 'eu-west-1')
    AWS_PROFILE = os.getenv('AWS_PROFILE', 'default')
    AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID', None)
    AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY', None)
    b3_session, sm_client, sm_runtime, sm_session = get_sm_session(
        region=AWS_DEFAULT_REGION,
        profile_name=AWS_PROFILE,
        aws_access_key_id=AWS_ACCESS_KEY_ID,
        aws_secret_access_key=AWS_SECRET_ACCESS_KEY)

    # read test data
    test_data = load_dataset(train_data['train']['test'],
                             'test.csv',
                             sagemaker_session=sm_session)
    print(f"Loadding {train_data['train']['test']}")
    Y_val = test_data.iloc[:, 0].to_numpy()
    print(f"Test dataset shape: {Y_val.shape}")

    # list capture files, this is just as an example. Not used right
    # now but could be.
    capture_files = sorted(
        S3Downloader.list("{}/{}".format(
            deploy_data['monitor']['s3_capture_upload_path'],
            deploy_data['endpoint']['name']),
                          sagemaker_session=sm_session))
    # just the files with the prefix
    filtered = list(
        filter(lambda file_name: capture_prefix in file_name, capture_files))
    print(f"Detected {len(filtered)} capture files")

    capture_records = []
    for c_file in filtered:
        print(f"Processing: {c_file}")
        # read the capture data directly from S3
        content = S3Downloader.read_file(c_file, sagemaker_session=sm_session)
        records = [json.loads(l) for l in content.split("\n")[:-1]]

        capture_records.extend(records)

    print(f"No. of records {len(capture_records)} captured")
    captured_predictions = {}

    for obj in capture_records:
        # Extract inference ID
        inference_id = obj["eventMetadata"]["inferenceId"]
        # current version of script start in 1 when id=0
        # remove the prefix and get the id
        req_id = int(inference_id[len(inference_id_prefix):])

        # Extract result given by the model
        Y_pred_value = encoders.decode(
            obj["captureData"]["endpointOutput"]["data"],
            # i have fixed this value here becouse
            # obj["captureData"]["endpointOutput"]["observedContentType"]
            # some times include the encoding like: text/csv; utf-8
            # and encoders.decode() will give error.
            content_types.CSV)
        captured_predictions[req_id] = Y_pred_value  # np.array

    # save and upload the ground truth labels
    print("Generating labels")
    fake_records = []
    for i, label in captured_predictions.items():
        val = ground_truth_with_id(i, label, Y_val, inference_id_prefix)
        fake_records.append(json.dumps(val))

    data_to_upload = "\n".join(fake_records)
    target_s3_uri = "{}/{}/{}.jsonl".format(
        deploy_data['monitor']['ground truth uri'], capture_prefix,
        uuid.uuid4().hex)
    print(f"Uploading ground truth to {target_s3_uri} ...", end="")
    S3Uploader.upload_string_as_file_body(data_to_upload,
                                          target_s3_uri,
                                          sagemaker_session=sm_session)
    print("Done !")
Exemple #6
0
def main():
    # define some configurations from env

    # AWS especific
    AWS_DEFAULT_REGION = os.getenv('AWS_DEFAULT_REGION', None)
    AWS_PROFILE = os.getenv('AWS_PROFILE', None)
    AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID', None)
    AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY', None)
    b3_session, sm_client, sm_runtime, sm_session = get_sm_session(
        region=AWS_DEFAULT_REGION,
        profile_name=AWS_PROFILE,
        aws_access_key_id=AWS_ACCESS_KEY_ID,
        aws_secret_access_key=AWS_SECRET_ACCESS_KEY)

    ROLE_ARN = os.getenv('AWS_ROLE', sagemaker.get_execution_role())

    # MLOps especific
    PIPELINE_NAME = os.getenv('PIPELINE_NAME', 'stsPipeline')
    MODEL_PACKAGE_GROUP_NAME = os.getenv('MODEL_PACKAGE_GROUP_NAME',
                                         'sts-sklearn-grp')
    BASE_JOB_PREFIX = os.getenv('BASE_JOB_PREFIX', 'sts')

    outputs = {'pipeline': None, 'baseline': None, 'train': None}

    try:
        # define the ml pipeline for training
        pipe = get_pipeline(AWS_DEFAULT_REGION,
                            sm_session,
                            role=ROLE_ARN,
                            pipeline_name=PIPELINE_NAME,
                            model_package_group_name=MODEL_PACKAGE_GROUP_NAME,
                            base_job_prefix=BASE_JOB_PREFIX)

        # output debug information
        parsed = json.loads(pipe.definition())
        outputs['pipeline'] = parsed
        _l.debug('ML Pipeline definition')
        _l.debug(json.dumps(parsed, indent=2, sort_keys=True))

        # Created/Updated SageMaker Pipeline
        upsert_response = pipe.upsert(role_arn=ROLE_ARN)
        _l.debug(
            f"C/U SageMaker Pipeline: Response received: {upsert_response}")

        _l.info("Starting the SageMaker pipeline")
        execution = pipe.start()
        _l.info("Waiting for the pipeline")
        execution.wait()

        _l.info("Pipeline finished: !!!")
        _l.debug(f"{pprint.pformat(execution.list_steps())}")

        # Take the s3 uri of the baseline datatase baseline.csv
        mse_step = extract_step_from_list(parsed.get('Steps'),
                                          'CheckMSESTSEvaluation')
        mon_step = extract_step_from_list(
            mse_step.get('Arguments').get('IfSteps'), 'SetupMonitoringData')

        outputs['baseline'] = get_outputs(mon_step)
        # take de s3 uri of train, validate, and test datasets
        train_step_def = extract_step_from_list(parsed.get('Steps'),
                                                'PreprocessSTSData')
        outputs['train'] = get_outputs(train_step_def)
        # --

        # whrite the pipeline def and the selected outputs to a json
        # file
        with open('trainmodel_out.json', 'w') as f:
            json.dump(outputs, f)
        # ---
    except Exception as e:
        _l.exception(f"Exception: {e}")