def main(resources): AWS_DEFAULT_REGION = os.getenv('AWS_DEFAULT_REGION', 'eu-west-1') AWS_PROFILE = os.getenv('AWS_PROFILE', 'default') AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID', None) AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY', None) b3_session, sm_client, sm_runtime, sm_session = get_sm_session( region=AWS_DEFAULT_REGION, profile_name=AWS_PROFILE, aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY) # remove resourses created by deploymodel.py and setup_mq.py if 'monitor' in resources and 'schedule_name' in resources['monitor']: print("Removing Model Quality Schedule") delete_schedule(resources['monitor']['schedule_name'], sm_client) if 'endpoint' in resources: predictor = SKLearnPredictor(resources['endpoint']['name'], sagemaker_session=sm_session) print("Removing model from registry") predictor.delete_model() print("Removing endpoint") predictor.delete_endpoint(delete_endpoint_config=True) print("None of the S3 resources were deleted !!!")
def main(deploy_data, train_data): inference_id_prefix = 'sts_' # Comes from deploymodel.py outputs = {'inferences': []} # AWS especific AWS_DEFAULT_REGION = os.getenv('AWS_DEFAULT_REGION', 'eu-west-1') AWS_PROFILE = os.getenv('AWS_PROFILE', 'default') AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID', None) AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY', None) b3_session, sm_client, sm_runtime, sm_session = get_sm_session( region=AWS_DEFAULT_REGION, profile_name=AWS_PROFILE, aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY) # Load a predictor using the endpoint name predictor = SKLearnPredictor( deploy_data['endpoint']['name'], sagemaker_session=sm_session, serializer=CSVSerializer(), # necesary or MQM don't work deserializer=CSVDeserializer() # necesary or MQM don't work ) # read test data test_data = load_dataset(train_data['train']['test'], 'test.csv', sagemaker_session=sm_session) print(f"Loadding {train_data['train']['test']}") # remove labels in the test dataset test_data.drop(test_data.columns[0], axis=1, inplace=True) # Iterate over the test data and call the endpoint for each row, # stop for 2 seconds for rows divisible by 3, just to make time x_test_rows = test_data.values print(f"Sending trafic to the endpoint: {deploy_data['endpoint']['name']}") with progressbar.ProgressBar(max_value=len(x_test_rows)) as bar: for index, x_test_row in enumerate(x_test_rows, start=1): # Auto-generate an inference-id to track the request/response # in the captured data inference_id = '{}{}'.format(inference_id_prefix, index) result = predictor.predict(x_test_row, inference_id=inference_id) outputs['inferences'].append({ inference_id: { 'input': x_test_row.tolist(), 'result': result } }) # show progress bar.update(index) with open('testendpoint_out.json', 'w') as f: json.dump(outputs, f)
def main(resources, train_data): # configurarion AWS_DEFAULT_REGION = os.getenv('AWS_DEFAULT_REGION', 'eu-west-1') AWS_PROFILE = os.getenv('AWS_PROFILE', 'default') AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID', None) AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY', None) b3_session, sm_client, sm_runtime, sm_session = get_sm_session( region=AWS_DEFAULT_REGION, profile_name=AWS_PROFILE, aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY) BASE_JOB_PREFIX = os.getenv('BASE_JOB_PREFIX', 'sts') ROLE_ARN = os.getenv('AWS_ROLE', sagemaker.get_execution_role()) outputs = resources bucket = sm_session.default_bucket() prefix = "{}/{}".format(BASE_JOB_PREFIX, resources['endpoint']['name']) if 'monitor' not in resources: raise ValueError("Monitoring not enabled") if 's3_capture_upload_path' not in resources['monitor']: raise ValueError("Monitoring not enabled") baseline_prefix = prefix + "/baselining" baseline_data_prefix = baseline_prefix + "/data" baseline_results_prefix = baseline_prefix + "/results" baseline_data_uri = "s3://{}/{}".format(bucket, baseline_data_prefix) baseline_results_uri = "s3://{}/{}".format(bucket, baseline_results_prefix) outputs['monitor'].update({ 'baseline': { 'data_uri': baseline_data_uri, 'results_uri': baseline_results_uri } }) _l.info("Baseline data uri: {}".format(baseline_data_uri)) _l.info("Baseline results uri: {}".format(baseline_results_uri)) ground_truth_upload_path = f"s3://{bucket}/{prefix}/ground_truth_data" _l.info(f"Ground truth uri: {ground_truth_upload_path}") outputs['monitor'].update({'ground truth uri': ground_truth_upload_path}) # Create a baselining job with training dataset _l.info("Executing a baselining job with training dataset") _l.info(f"baseline_data_uri: {train_data['baseline']['validate']}") my_monitor = ModelQualityMonitor( role=ROLE_ARN, sagemaker_session=sm_session, max_runtime_in_seconds=1800 # 30 minutes ) my_monitor.suggest_baseline( baseline_dataset=train_data['baseline']['validate'] + "/baseline.csv", dataset_format=DatasetFormat.csv(header=True), problem_type="Regression", inference_attribute="prediction", ground_truth_attribute="label", output_s3_uri=baseline_results_uri, wait=True) baseline_job = my_monitor.latest_baselining_job _l.info("suggested baseline contrains") _l.info( pprint.pformat(baseline_job.suggested_constraints(). body_dict["regression_constraints"])) _l.info("suggested baseline statistics") _l.info( pprint.pformat(baseline_job.baseline_statistics(). body_dict["regression_metrics"])) monitor_schedule_name = ( f"{BASE_JOB_PREFIX}-mq-sch-{datetime.datetime.utcnow():%Y-%m-%d-%H%M}") _l.info(f"Monitoring schedule name: {monitor_schedule_name}") outputs['monitor'].update({'schedule_name': monitor_schedule_name}) endpointInput = EndpointInput( resources['endpoint']['name'], "/opt/ml/processing/input_data", inference_attribute='0' # REVIEW: ) my_monitor.create_monitoring_schedule( monitor_schedule_name=monitor_schedule_name, endpoint_input=endpointInput, output_s3_uri=baseline_results_uri, problem_type="Regression", ground_truth_input=ground_truth_upload_path, constraints=baseline_job.suggested_constraints(), # run the scheduler hourly schedule_cron_expression=CronExpressionGenerator.hourly(), enable_cloudwatch_metrics=True, ) mq_schedule_details = my_monitor.describe_schedule() while mq_schedule_details['MonitoringScheduleStatus'] == 'Pending': _l.info(f'Waiting for {monitor_schedule_name}') time.sleep(3) mq_schedule_details = my_monitor.describe_schedule() _l.debug( f"Model Quality Monitor - schedule details: {pprint.pformat(mq_schedule_details)}" ) _l.info( f"Model Quality Monitor - schedule status: {mq_schedule_details['MonitoringScheduleStatus']}" ) # save outputs to a file with open('deploymodel_out.json', 'w') as f: json.dump(outputs, f, default=json_default)
def main(datacapture=False): # Load config from environment and set required defaults # AWS especific AWS_DEFAULT_REGION = os.getenv('AWS_DEFAULT_REGION', 'eu-west-1') AWS_PROFILE = os.getenv('AWS_PROFILE', 'default') AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID', None) AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY', None) b3_session, sm_client, sm_runtime, sm_session = get_sm_session( region=AWS_DEFAULT_REGION, profile_name=AWS_PROFILE, aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY ) ROLE_ARN = os.getenv('AWS_ROLE', sagemaker.get_execution_role()) MODEL_PACKAGE_GROUP_NAME = os.getenv( 'MODEL_PACKAGE_GROUP_NAME', 'sts-sklearn-grp') BASE_JOB_PREFIX = os.getenv('BASE_JOB_PREFIX', 'sts') # define useful const's bucket = sm_session.default_bucket() endpoint_name = "{}-sklearn-{}".format( BASE_JOB_PREFIX, datetime.datetime.now().strftime("%Y%m%d%H%M") ) prefix = "{}/{}".format(BASE_JOB_PREFIX, endpoint_name) data_capture_prefix = "{}/datacapture".format(prefix) s3_capture_upload_path = "s3://{}/{}".format(bucket, data_capture_prefix) # outputs is a dict to save to json outputs = dict() if datacapture is True: # if data capture was enabled output the S3 Uri for data capture outputs['monitor'] = { 's3_capture_upload_path': s3_capture_upload_path } # get the last version aproved in the model package group model_package_arn = get_approved_package( MODEL_PACKAGE_GROUP_NAME, sm_client) _l.info(f"Latest approved model package: {model_package_arn}") model_info = sm_client.describe_model_package( ModelPackageName=model_package_arn) outputs['model_info'] = model_info model_uri = model_info.get( 'InferenceSpecification')['Containers'][0]['ModelDataUrl'] _l.info(f"Model data uri: {model_uri}") sk_model = SKLearnModel( model_uri, # s3 uri for the model.tar.gz ROLE_ARN, # sagemaker role to be used 'model_loader.py', # script to load the model framework_version='0.23-1' ) data_capture_config=None if datacapture is True: # if data capture was enabled generated the required config _l.info("Enabling data capture as requested") _l.info(f"s3_capture_upload_path: {s3_capture_upload_path}") data_capture_config = DataCaptureConfig( enable_capture=True, sampling_percentage=100, destination_s3_uri=s3_capture_upload_path, capture_options=["REQUEST", "RESPONSE"], sagemaker_session=sm_session ) # Deploy the endpoint predictor = sk_model.deploy( instance_type="ml.m5.xlarge", initial_instance_count=1, serializer=CSVSerializer(), deserializer=CSVDeserializer(), data_capture_config=data_capture_config, endpoint_name=endpoint_name ) _l.info(f"Endpoint name: {predictor.endpoint_name}") outputs['endpoint'] = { 'name': endpoint_name, 'config_name': predictor.endpoint_name # is the same as the endpoint ? } outputs['model_info'].update({"name": sk_model.name}) # ENDPOINT deploy done # save useful outputs to a file with open('deploymodel_out.json', 'w') as f: json.dump(outputs, f, default=json_default)
def main(deploy_data: dict, train_data: dict, capture_prefix: str): inference_id_prefix = 'sts_' # the same used in testendpoint.py # Load config from environment and set required defaults # AWS especific AWS_DEFAULT_REGION = os.getenv('AWS_DEFAULT_REGION', 'eu-west-1') AWS_PROFILE = os.getenv('AWS_PROFILE', 'default') AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID', None) AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY', None) b3_session, sm_client, sm_runtime, sm_session = get_sm_session( region=AWS_DEFAULT_REGION, profile_name=AWS_PROFILE, aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY) # read test data test_data = load_dataset(train_data['train']['test'], 'test.csv', sagemaker_session=sm_session) print(f"Loadding {train_data['train']['test']}") Y_val = test_data.iloc[:, 0].to_numpy() print(f"Test dataset shape: {Y_val.shape}") # list capture files, this is just as an example. Not used right # now but could be. capture_files = sorted( S3Downloader.list("{}/{}".format( deploy_data['monitor']['s3_capture_upload_path'], deploy_data['endpoint']['name']), sagemaker_session=sm_session)) # just the files with the prefix filtered = list( filter(lambda file_name: capture_prefix in file_name, capture_files)) print(f"Detected {len(filtered)} capture files") capture_records = [] for c_file in filtered: print(f"Processing: {c_file}") # read the capture data directly from S3 content = S3Downloader.read_file(c_file, sagemaker_session=sm_session) records = [json.loads(l) for l in content.split("\n")[:-1]] capture_records.extend(records) print(f"No. of records {len(capture_records)} captured") captured_predictions = {} for obj in capture_records: # Extract inference ID inference_id = obj["eventMetadata"]["inferenceId"] # current version of script start in 1 when id=0 # remove the prefix and get the id req_id = int(inference_id[len(inference_id_prefix):]) # Extract result given by the model Y_pred_value = encoders.decode( obj["captureData"]["endpointOutput"]["data"], # i have fixed this value here becouse # obj["captureData"]["endpointOutput"]["observedContentType"] # some times include the encoding like: text/csv; utf-8 # and encoders.decode() will give error. content_types.CSV) captured_predictions[req_id] = Y_pred_value # np.array # save and upload the ground truth labels print("Generating labels") fake_records = [] for i, label in captured_predictions.items(): val = ground_truth_with_id(i, label, Y_val, inference_id_prefix) fake_records.append(json.dumps(val)) data_to_upload = "\n".join(fake_records) target_s3_uri = "{}/{}/{}.jsonl".format( deploy_data['monitor']['ground truth uri'], capture_prefix, uuid.uuid4().hex) print(f"Uploading ground truth to {target_s3_uri} ...", end="") S3Uploader.upload_string_as_file_body(data_to_upload, target_s3_uri, sagemaker_session=sm_session) print("Done !")
def main(): # define some configurations from env # AWS especific AWS_DEFAULT_REGION = os.getenv('AWS_DEFAULT_REGION', None) AWS_PROFILE = os.getenv('AWS_PROFILE', None) AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID', None) AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY', None) b3_session, sm_client, sm_runtime, sm_session = get_sm_session( region=AWS_DEFAULT_REGION, profile_name=AWS_PROFILE, aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY) ROLE_ARN = os.getenv('AWS_ROLE', sagemaker.get_execution_role()) # MLOps especific PIPELINE_NAME = os.getenv('PIPELINE_NAME', 'stsPipeline') MODEL_PACKAGE_GROUP_NAME = os.getenv('MODEL_PACKAGE_GROUP_NAME', 'sts-sklearn-grp') BASE_JOB_PREFIX = os.getenv('BASE_JOB_PREFIX', 'sts') outputs = {'pipeline': None, 'baseline': None, 'train': None} try: # define the ml pipeline for training pipe = get_pipeline(AWS_DEFAULT_REGION, sm_session, role=ROLE_ARN, pipeline_name=PIPELINE_NAME, model_package_group_name=MODEL_PACKAGE_GROUP_NAME, base_job_prefix=BASE_JOB_PREFIX) # output debug information parsed = json.loads(pipe.definition()) outputs['pipeline'] = parsed _l.debug('ML Pipeline definition') _l.debug(json.dumps(parsed, indent=2, sort_keys=True)) # Created/Updated SageMaker Pipeline upsert_response = pipe.upsert(role_arn=ROLE_ARN) _l.debug( f"C/U SageMaker Pipeline: Response received: {upsert_response}") _l.info("Starting the SageMaker pipeline") execution = pipe.start() _l.info("Waiting for the pipeline") execution.wait() _l.info("Pipeline finished: !!!") _l.debug(f"{pprint.pformat(execution.list_steps())}") # Take the s3 uri of the baseline datatase baseline.csv mse_step = extract_step_from_list(parsed.get('Steps'), 'CheckMSESTSEvaluation') mon_step = extract_step_from_list( mse_step.get('Arguments').get('IfSteps'), 'SetupMonitoringData') outputs['baseline'] = get_outputs(mon_step) # take de s3 uri of train, validate, and test datasets train_step_def = extract_step_from_list(parsed.get('Steps'), 'PreprocessSTSData') outputs['train'] = get_outputs(train_step_def) # -- # whrite the pipeline def and the selected outputs to a json # file with open('trainmodel_out.json', 'w') as f: json.dump(outputs, f) # --- except Exception as e: _l.exception(f"Exception: {e}")