def test_one_step_ingestion_pipeline( sagemaker_session, feature_store_session, feature_definitions, role, pipeline_name ): instance_count = ParameterInteger(name="InstanceCount", default_value=1) instance_type = ParameterString(name="InstanceType", default_value="ml.m5.4xlarge") input_name = "features.csv" input_file_path = os.path.join(DATA_DIR, "workflow", "features.csv") input_data_uri = os.path.join( "s3://", sagemaker_session.default_bucket(), "py-sdk-ingestion-test-input/features.csv", ) with open(input_file_path, "r") as data: body = data.read() S3Uploader.upload_string_as_file_body( body=body, desired_s3_uri=input_data_uri, sagemaker_session=sagemaker_session, ) inputs = [ ProcessingInput( input_name=input_name, source=input_data_uri, destination="/opt/ml/processing/features.csv", ) ] feature_group_name = f"py-sdk-integ-fg-{int(time.time() * 10**7)}" feature_group = FeatureGroup( name=feature_group_name, feature_definitions=feature_definitions, sagemaker_session=feature_store_session, ) ingestion_only_flow, output_name = generate_data_ingestion_flow_from_s3_input( input_name, input_data_uri, s3_content_type="csv", s3_has_header=True, ) outputs = [ ProcessingOutput( output_name=output_name, app_managed=True, feature_store_output=FeatureStoreOutput(feature_group_name=feature_group_name), ) ] output_content_type = "CSV" output_config = {output_name: {"content_type": output_content_type}} job_argument = [f"--output-config '{json.dumps(output_config)}'"] temp_flow_path = "./ingestion.flow" with cleanup_feature_group(feature_group): json.dump(ingestion_only_flow, open(temp_flow_path, "w")) data_wrangler_processor = DataWranglerProcessor( role=role, data_wrangler_flow_source=temp_flow_path, instance_count=instance_count, instance_type=instance_type, sagemaker_session=sagemaker_session, max_runtime_in_seconds=86400, ) data_wrangler_step = ProcessingStep( name="ingestion-step", processor=data_wrangler_processor, inputs=inputs, outputs=outputs, job_arguments=job_argument, ) pipeline = Pipeline( name=pipeline_name, parameters=[instance_count, instance_type], steps=[data_wrangler_step], sagemaker_session=sagemaker_session, ) try: response = pipeline.create(role) create_arn = response["PipelineArn"] offline_store_s3_uri = os.path.join( "s3://", sagemaker_session.default_bucket(), feature_group_name ) feature_group.create( s3_uri=offline_store_s3_uri, record_identifier_name="f11", event_time_feature_name="f10", role_arn=role, enable_online_store=False, ) _wait_for_feature_group_create(feature_group) execution = pipeline.start() response = execution.describe() assert response["PipelineArn"] == create_arn try: execution.wait(delay=60, max_attempts=10) except WaiterError: pass execution_steps = execution.list_steps() assert len(execution_steps) == 1 assert execution_steps[0]["StepName"] == "ingestion-step" assert execution_steps[0]["StepStatus"] == "Succeeded" athena_query = feature_group.athena_query() with timeout(minutes=10): athena_query.run( query_string=f'SELECT * FROM "{athena_query.table_name}"', output_location=f"{offline_store_s3_uri}/query_results", ) athena_query.wait() assert "SUCCEEDED" == athena_query.get_query_execution().get("QueryExecution").get( "Status" ).get("State") df = athena_query.as_dataframe() assert pd.read_csv(input_file_path).shape[0] == df.shape[0] finally: try: pipeline.delete() except Exception as e: print(f"Delete pipeline failed with error: {e}") os.remove(temp_flow_path)
def test_three_step_definition( sagemaker_session, region_name, role, script_dir, pipeline_name, athena_dataset_definition, ): framework_version = "0.20.0" instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge") instance_count = ParameterInteger(name="InstanceCount", default_value=1) output_prefix = ParameterString(name="OutputPrefix", default_value="output") input_data = f"s3://sagemaker-sample-data-{region_name}/processing/census/census-income.csv" sklearn_processor = SKLearnProcessor( framework_version=framework_version, instance_type=instance_type, instance_count=instance_count, base_job_name="test-sklearn", sagemaker_session=sagemaker_session, role=role, ) step_process = ProcessingStep( name="my-process", display_name="ProcessingStep", description="description for Processing step", processor=sklearn_processor, inputs=[ ProcessingInput(source=input_data, destination="/opt/ml/processing/input"), ProcessingInput(dataset_definition=athena_dataset_definition), ], outputs=[ ProcessingOutput(output_name="train_data", source="/opt/ml/processing/train"), ProcessingOutput( output_name="test_data", source="/opt/ml/processing/test", destination=Join( on="/", values=[ "s3:/", sagemaker_session.default_bucket(), "test-sklearn", output_prefix, ExecutionVariables.PIPELINE_EXECUTION_ID, ], ), ), ], code=os.path.join(script_dir, "preprocessing.py"), ) sklearn_train = SKLearn( framework_version=framework_version, entry_point=os.path.join(script_dir, "train.py"), instance_type=instance_type, sagemaker_session=sagemaker_session, role=role, ) step_train = TrainingStep( name="my-train", display_name="TrainingStep", description="description for Training step", estimator=sklearn_train, inputs=TrainingInput( s3_data=step_process.properties.ProcessingOutputConfig.Outputs[ "train_data" ].S3Output.S3Uri ), ) model = Model( image_uri=sklearn_train.image_uri, model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts, sagemaker_session=sagemaker_session, role=role, ) model_inputs = CreateModelInput( instance_type="ml.m5.large", accelerator_type="ml.eia1.medium", ) step_model = CreateModelStep( name="my-model", display_name="ModelStep", description="description for Model step", model=model, inputs=model_inputs, ) pipeline = Pipeline( name=pipeline_name, parameters=[instance_type, instance_count, output_prefix], steps=[step_process, step_train, step_model], sagemaker_session=sagemaker_session, ) definition = json.loads(pipeline.definition()) assert definition["Version"] == "2020-12-01" assert set(tuple(param.items()) for param in definition["Parameters"]) == set( [ tuple( { "Name": "InstanceType", "Type": "String", "DefaultValue": "ml.m5.xlarge", }.items() ), tuple({"Name": "InstanceCount", "Type": "Integer", "DefaultValue": 1}.items()), tuple( { "Name": "OutputPrefix", "Type": "String", "DefaultValue": "output", }.items() ), ] ) steps = definition["Steps"] assert len(steps) == 3 names_and_types = [] display_names_and_desc = [] processing_args = {} training_args = {} for step in steps: names_and_types.append((step["Name"], step["Type"])) display_names_and_desc.append((step["DisplayName"], step["Description"])) if step["Type"] == "Processing": processing_args = step["Arguments"] if step["Type"] == "Training": training_args = step["Arguments"] if step["Type"] == "Model": model_args = step["Arguments"] assert set(names_and_types) == set( [ ("my-process", "Processing"), ("my-train", "Training"), ("my-model", "Model"), ] ) assert set(display_names_and_desc) == set( [ ("ProcessingStep", "description for Processing step"), ("TrainingStep", "description for Training step"), ("ModelStep", "description for Model step"), ] ) assert processing_args["ProcessingResources"]["ClusterConfig"] == { "InstanceType": {"Get": "Parameters.InstanceType"}, "InstanceCount": {"Get": "Parameters.InstanceCount"}, "VolumeSizeInGB": 30, } assert training_args["ResourceConfig"] == { "InstanceCount": 1, "InstanceType": {"Get": "Parameters.InstanceType"}, "VolumeSizeInGB": 30, } assert training_args["InputDataConfig"][0]["DataSource"]["S3DataSource"]["S3Uri"] == { "Get": "Steps.my-process.ProcessingOutputConfig.Outputs['train_data'].S3Output.S3Uri" } assert model_args["PrimaryContainer"]["ModelDataUrl"] == { "Get": "Steps.my-train.ModelArtifacts.S3ModelArtifacts" } try: response = pipeline.create(role) create_arn = response["PipelineArn"] assert re.match( rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}", create_arn, ) finally: try: pipeline.delete() except Exception: pass
def test_steps_with_map_params_pipeline( sagemaker_session, role, script_dir, pipeline_name, region_name, athena_dataset_definition, ): instance_count = ParameterInteger(name="InstanceCount", default_value=2) framework_version = "0.20.0" instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge") output_prefix = ParameterString(name="OutputPrefix", default_value="output") input_data = f"s3://sagemaker-sample-data-{region_name}/processing/census/census-income.csv" sklearn_processor = SKLearnProcessor( framework_version=framework_version, instance_type=instance_type, instance_count=instance_count, base_job_name="test-sklearn", sagemaker_session=sagemaker_session, role=role, ) step_process = ProcessingStep( name="my-process", display_name="ProcessingStep", description="description for Processing step", processor=sklearn_processor, inputs=[ ProcessingInput(source=input_data, destination="/opt/ml/processing/input"), ProcessingInput(dataset_definition=athena_dataset_definition), ], outputs=[ ProcessingOutput(output_name="train_data", source="/opt/ml/processing/train"), ProcessingOutput( output_name="test_data", source="/opt/ml/processing/test", destination=Join( on="/", values=[ "s3:/", sagemaker_session.default_bucket(), "test-sklearn", output_prefix, ExecutionVariables.PIPELINE_EXECUTION_ID, ], ), ), ], code=os.path.join(script_dir, "preprocessing.py"), ) sklearn_train = SKLearn( framework_version=framework_version, entry_point=os.path.join(script_dir, "train.py"), instance_type=instance_type, sagemaker_session=sagemaker_session, role=role, hyperparameters={ "batch-size": 500, "epochs": 5, }, ) step_train = TrainingStep( name="my-train", display_name="TrainingStep", description="description for Training step", estimator=sklearn_train, inputs=TrainingInput( s3_data=step_process.properties.ProcessingOutputConfig.Outputs[ "train_data" ].S3Output.S3Uri ), ) model = Model( image_uri=sklearn_train.image_uri, model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts, sagemaker_session=sagemaker_session, role=role, ) model_inputs = CreateModelInput( instance_type="ml.m5.large", accelerator_type="ml.eia1.medium", ) step_model = CreateModelStep( name="my-model", display_name="ModelStep", description="description for Model step", model=model, inputs=model_inputs, ) # Condition step for evaluating model quality and branching execution cond_lte = ConditionGreaterThanOrEqualTo( left=step_train.properties.HyperParameters["batch-size"], right=6.0, ) step_cond = ConditionStep( name="CustomerChurnAccuracyCond", conditions=[cond_lte], if_steps=[], else_steps=[step_model], ) pipeline = Pipeline( name=pipeline_name, parameters=[instance_type, instance_count, output_prefix], steps=[step_process, step_train, step_cond], sagemaker_session=sagemaker_session, ) definition = json.loads(pipeline.definition()) assert definition["Version"] == "2020-12-01" steps = definition["Steps"] assert len(steps) == 3 training_args = {} condition_args = {} for step in steps: if step["Type"] == "Training": training_args = step["Arguments"] if step["Type"] == "Condition": condition_args = step["Arguments"] assert training_args["InputDataConfig"][0]["DataSource"]["S3DataSource"]["S3Uri"] == { "Get": "Steps.my-process.ProcessingOutputConfig.Outputs['train_data'].S3Output.S3Uri" } assert condition_args["Conditions"][0]["LeftValue"] == { "Get": "Steps.my-train.HyperParameters['batch-size']" } try: response = pipeline.create(role) create_arn = response["PipelineArn"] assert re.match( rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}", create_arn, ) finally: try: pipeline.delete() except Exception: pass
def runProcessing( self, entrypoint=None, command=None, env=None, code=None, arguments=None, inputs=list(), outputs=list(), instance_type=constants.DEFAULT_INSTANCE_TYPE_TRAINING, instance_count=constants.DEFAULT_INSTANCE_COUNT, role_name=constants.DEFAULT_IAM_ROLE, volume_size=constants.DEFAULT_VOLUME_SIZE, max_run_mins=constants.DEFAULT_MAX_RUN, tags=dict(), input_distribution="FullyReplicated", dependencies=list(), ): logger.info( f"===== Running a processing job {self.task_name} entrypoint={entrypoint} " f"command={command} code={code} arguments={arguments}... =====") job_name = self._getJobName() # ## Outputs # state - continuesly updated state_path = "/opt/ml/processing/state" outputs.append( ProcessingOutput(state_path, self.stateS3Uri, "state", "Continuous")) env["SSM_STATE"] = state_path # output - copied by end of job output_path = "/opt/ml/processing/output" output_s3_uri = sagemaker.s3.s3_path_join(self.baseTaskS3Uri, job_name, "output") outputs.append( ProcessingOutput(output_path, output_s3_uri, "output", "EndOfJob")) env["SSM_OUTPUT"] = output_path # ## Inputs # prev state bucket, prefix = sagemaker.s3.parse_s3_url(self.stateS3Uri) if self.smSession.list_s3_files(bucket, prefix): prev_state_path = "/opt/ml/processing/state_prev" inputs.append( ProcessingInput( self.stateS3Uri, prev_state_path, "state_prev", s3_data_distribution_type="FullyReplicated", )) # dependencies # append the internal dependencies dependencies.extend(self.internalDependencies) for dep in dependencies: dep = os.path.abspath(dep) basename = os.path.basename(dep) local_path = f"/opt/ml/processing/input/code/{basename}" inputs.append( ProcessingInput( dep, local_path, "DEP_" + basename, s3_data_distribution_type="FullyReplicated", )) # input data if self.inputS3Uri: data_path = "/opt/ml/processing/data" inputs.append( ProcessingInput( self.inputS3Uri, data_path, "data", s3_data_distribution_type=input_distribution, )) env["SM_CHANNEL_DATA"] = data_path tags["SimpleSagemakerTask"] = self.task_name tags["SimpleSagemakerVersion"] = VERSION tags = [{"Key": k, "Value": v} for k, v in tags.items()] additional_args = dict() if code: processor_class = ScriptProcessor additional_args["command"] = command else: assert ( not command ), "Command can't be given where code isn't given (for the `Processor` class)" processor_class = Processor additional_args["entrypoint"] = entrypoint processor = processor_class( role=role_name, image_uri=self.image_uri, instance_count=instance_count, instance_type=instance_type, volume_size_in_gb=volume_size, max_runtime_in_seconds=max_run_mins * 60, sagemaker_session=self.smSession, tags=tags, env=env, **additional_args, ) if code: processor.run( code=code, inputs=inputs, outputs=outputs, arguments=arguments, job_name=job_name, ) else: processor.run( inputs=inputs, outputs=outputs, arguments=arguments, job_name=job_name, ) proecessing_job_description = self.smSession.describe_processing_job( job_name) self.estimators.append(processor) self.jobNames.append(job_name) self.descriptions.append(proecessing_job_description) # print(proecessing_job_description) # if "Completed" != proecessing_job_description["TrainingJobStatus"]: # logger.error( # f"Task failed with status: {proecessing_job_description['TrainingJobStatus']}" # ) return job_name
def define_inference_pipeline( sm_role, workflow_execution_role, inference_pipeline_name, return_yaml=True, dump_yaml_file="templates/sagemaker_inference_pipeline.yaml", ): """ Return YAML definition of the training pipeline, which consists of multiple Amazon StepFunction steps sm_role: ARN of the SageMaker execution role workflow_execution_role: ARN of the StepFunction execution role return_yaml: Return YAML representation or not, if False, it returns an instance of `stepfunctions.workflow.WorkflowObject` dump_yaml_file: If not None, a YAML file will be generated at this file location """ # Pass required parameters dynamically for each execution using placeholders. execution_input = ExecutionInput( schema={ "InputDataURL": str, "PreprocessingJobName": str, "InferenceJobName": str, "ProcModelS3": str, "PreprocessingCodeURL": str, "InferenceCodeURL": str, "ModelS3": str, "PreprocessedTrainDataURL": str, "PreprocessedTestDataURL": str, "OutputPathURL": str, }) """ Create Preprocessing Model from model artifact. """ # sagemaker_session = sagemaker.Session() sklearn_processor = SKLearnProcessor( framework_version="0.20.0", role=sm_role, instance_type="ml.m5.xlarge", instance_count=1, max_runtime_in_seconds=1200, ) # Create ProcessingInputs and ProcessingOutputs objects for Inputs and # Outputs respectively for the SageMaker Processing Job inputs = [ ProcessingInput( source=execution_input["InputDataURL"], destination="/opt/ml/processing/input", input_name="input-1", ), ProcessingInput( source=execution_input["PreprocessingCodeURL"], destination="/opt/ml/processing/input/code", input_name="code", ), ProcessingInput( source=execution_input["ProcModelS3"], destination="/opt/ml/processing/model", input_name="proc_model", ), ] outputs = [ ProcessingOutput( source="/opt/ml/processing/test", destination=execution_input["PreprocessedTestDataURL"], output_name="test_data", ), ] processing_step = ProcessingStep( "SageMaker pre-processing step", processor=sklearn_processor, job_name=execution_input["PreprocessingJobName"], inputs=inputs, outputs=outputs, container_arguments=["--mode", "infer"], container_entrypoint=[ "python3", "/opt/ml/processing/input/code/preprocessing.py", ], ) """ Create inference with sklearn processing step. Inputs are the preprocessed data S3 URL, the inference code S3 URL, and the model S3 URL. Output is the inferred data. """ sklearn_processor2 = SKLearnProcessor( framework_version="0.20.0", role=sm_role, instance_type="ml.m5.xlarge", instance_count=1, max_runtime_in_seconds=1200, ) inputs = [ ProcessingInput( source=execution_input["PreprocessedTestDataURL"], destination="/opt/ml/processing/input", input_name="input-1", ), ProcessingInput( source=execution_input["InferenceCodeURL"], destination="/opt/ml/processing/input/code", input_name="code", ), ProcessingInput( source=execution_input["ModelS3"], destination="/opt/ml/processing/model", input_name="model", ), ] outputs = [ ProcessingOutput( source="/opt/ml/processing/test", destination=execution_input["OutputPathURL"], output_name="test_data", ), ] inference_step = ProcessingStep( "SageMaker inference step", processor=sklearn_processor2, job_name=execution_input["InferenceJobName"], inputs=inputs, outputs=outputs, container_entrypoint=[ "python3", "/opt/ml/processing/input/code/inference.py", ], ) # Create Fail state to mark the workflow failed in case any of the steps fail. failed_state_sagemaker_processing_failure = stepfunctions.steps.states.Fail( "ML Workflow failed", cause="SageMakerProcessingJobFailed") # Add the Error handling in the workflow catch_state_processing = stepfunctions.steps.states.Catch( error_equals=["States.TaskFailed"], next_step=failed_state_sagemaker_processing_failure, ) processing_step.add_catch(catch_state_processing) inference_step.add_catch(catch_state_processing) # Create the Workflow workflow_graph = Chain([processing_step, inference_step]) inference_pipeline = Workflow( name=inference_pipeline_name, definition=workflow_graph, role=workflow_execution_role, ) return inference_pipeline
def test_local_processing_script_processor(sagemaker_local_session, sklearn_image_uri): input_file_path = os.path.join(DATA_DIR, "dummy_input.txt") script_processor = ScriptProcessor( role="SageMakerRole", image_uri=sklearn_image_uri, command=["python3"], instance_count=1, instance_type="local", volume_size_in_gb=30, volume_kms_key=None, max_runtime_in_seconds=3600, base_job_name="test-script-processor", env={"DUMMY_ENVIRONMENT_VARIABLE": "dummy-value"}, tags=[{ "Key": "dummy-tag", "Value": "dummy-tag-value" }], sagemaker_session=sagemaker_local_session, ) script_processor.run( code=os.path.join(DATA_DIR, "dummy_script.py"), inputs=[ ProcessingInput( source=input_file_path, destination="/opt/ml/processing/input/container/path/", input_name="dummy_input", s3_data_type="S3Prefix", s3_input_mode="File", s3_data_distribution_type="FullyReplicated", s3_compression_type="None", ) ], outputs=[ ProcessingOutput( source="/opt/ml/processing/output/container/path/", output_name="dummy_output", s3_upload_mode="EndOfJob", ) ], arguments=["-v"], wait=True, logs=True, ) job_description = script_processor.latest_job.describe() assert job_description["ProcessingInputs"][0]["InputName"] == "dummy_input" assert job_description["ProcessingInputs"][1]["InputName"] == "code" assert job_description["ProcessingJobName"].startswith( "test-script-processor") assert job_description["ProcessingJobStatus"] == "Completed" assert job_description["ProcessingOutputConfig"]["Outputs"][0][ "OutputName"] == "dummy_output" assert job_description["ProcessingResources"]["ClusterConfig"][ "InstanceCount"] == 1 assert job_description["ProcessingResources"]["ClusterConfig"][ "InstanceType"] == "local" assert job_description["ProcessingResources"]["ClusterConfig"][ "VolumeSizeInGB"] == 30 assert job_description["AppSpecification"]["ContainerArguments"] == ["-v"] assert job_description["AppSpecification"]["ContainerEntrypoint"] == [ "python3", "/opt/ml/processing/input/code/dummy_script.py", ] assert job_description["AppSpecification"]["ImageUri"] == sklearn_image_uri assert job_description["Environment"] == { "DUMMY_ENVIRONMENT_VARIABLE": "dummy-value" }
def getter(self, attr: str) -> Dict[str, Any]: data = { 'tfrecord_processing': { 'endpoint': ['python3', 'criteo_ads_data/run_processing.py'], 'inputs': [ ProcessingInput( source='s3://criteo-ads-data/prod/train_csv', destination='/opt/ml/processing/input', s3_data_distribution_type='ShardedByS3Key', ) ], 'outputs': [ ProcessingOutput( source='/opt/ml/processing/output', destination= 's3://criteo-ads-data/prod/train_tfrecord_gz', ) ], 'arguments': [ '--input_path=/opt/ml/processing/input', '--output_path=/opt/ml/processing/output', ], 'sm_config': SagemakerProcessingConfig( project_name=self.project_name, env=self.env, region_name=self.region_name, current_time=self.current_time, sm_instance_type='ml.c5.2xlarge', sm_instance_count=20, sm_volumesize=100, max_run=1 * 60 * 60, ) }, 'layer_processing': { 'endpoint': ['python3', 'criteo_ads_data/run_processing_layer.py'], 'inputs': [ ProcessingInput( source= 's3://criteo-ads-data/prod/train_tfrecord_gz/train', destination='/opt/ml/processing/input', s3_data_distribution_type='FullyReplicated', ) ], 'outputs': [ ProcessingOutput( source='/opt/ml/processing/output', destination='s3://criteo-ads-data/prod/proc_layer', ) ], 'arguments': [ '--input_path=/opt/ml/processing/input', '--output_path=/opt/ml/processing/output', ], 'sm_config': SagemakerProcessingConfig( project_name=self.project_name, env=self.env, region_name=self.region_name, current_time=self.current_time, sm_instance_type='ml.c5.9xlarge', sm_instance_count=1, sm_volumesize=100, max_run=24 * 60 * 60, ) }, 'estimator': { 'sm_input': { 'train': TrainingInput( s3_data= 's3://criteo-ads-data/prod/train_tfrecord_100000_gz/train', distribution='FullyReplicated', ), 'test': TrainingInput( s3_data= 's3://criteo-ads-data/prod/train_tfrecord_100000_gz/test', distribution='FullyReplicated', ), 'layer': TrainingInput( s3_data='s3://criteo-ads-data/prod/proc_layer_100000', distribution='FullyReplicated', ), }, 'shared_hyperparameters': { 'tf_logs_path': self.tf_logs_path, 'batch_size': 512, }, 'sm_config': SagemakerTrainingConfig( project_name=self.project_name, env=self.env, region_name=self.region_name, current_time=self.current_time, sm_instance_type='ml.c5.2xlarge', sm_instance_count=1, sm_volumesize=300, max_run=1 * 24 * 60 * 60, ) }, 'hparam_tuning': { 'objective_metric_name': 'validation:loss', 'metric_definitions': [ { 'Name': 'train:loss', 'Regex': '.*loss: ([0-9\\.]+) - auc: [0-9\\.]+.*' }, { 'Name': 'train:auc', 'Regex': '.*loss: [0-9\\.]+ - auc: ([0-9\\.]+).*' }, { 'Name': 'validation:loss', 'Regex': '.*step - loss: [0-9\\.]+ - auc: [0-9\\.]+ - val_loss: ([0-9\\.]+) - val_auc: [0-9\\.]+.*' }, { 'Name': 'validation:auc', 'Regex': '.*step - loss: [0-9\\.]+ - auc: [0-9\\.]+ - val_loss: [0-9\\.]+ - val_auc: ([0-9\\.]+).*' }, ], 'hyperparameter_ranges': { 'epochs': IntegerParameter(1, 50), 'batch_size': CategoricalParameter([64, 128, 256, 512]) }, 'objective_type': 'Minimize', 'max_jobs': 5, 'max_parallel_jobs': 5, }, } return data.get(attr)
def run_model_monitor_job_processor( region, instance_type, role, data_capture_path, statistics_path, constraints_path, reports_path, instance_count=1, preprocessor_path=None, postprocessor_path=None, publish_cloudwatch_metrics="Disabled", ): data_capture_sub_path = data_capture_path[data_capture_path. rfind("datacapture/"):] data_capture_sub_path = data_capture_sub_path[data_capture_sub_path. find("/") + 1:] processing_output_paths = reports_path + "/" + data_capture_sub_path input_1 = ProcessingInput( input_name="input_1", source=data_capture_path, destination="/opt/ml/processing/input/endpoint/" + data_capture_sub_path, s3_data_type="S3Prefix", s3_input_mode="File", ) baseline = ProcessingInput( input_name="baseline", source=statistics_path, destination="/opt/ml/processing/baseline/stats", s3_data_type="S3Prefix", s3_input_mode="File", ) constraints = ProcessingInput( input_name="constraints", source=constraints_path, destination="/opt/ml/processing/baseline/constraints", s3_data_type="S3Prefix", s3_input_mode="File", ) outputs = ProcessingOutput( output_name="result", source="/opt/ml/processing/output", destination=processing_output_paths, s3_upload_mode="Continuous", ) env = { "baseline_constraints": "/opt/ml/processing/baseline/constraints/" + get_file_name(constraints_path), "baseline_statistics": "/opt/ml/processing/baseline/stats/" + get_file_name(statistics_path), "dataset_format": '{"sagemakerCaptureJson":{"captureIndexNames":["endpointInput","endpointOutput"]}}', "dataset_source": "/opt/ml/processing/input/endpoint", "output_path": "/opt/ml/processing/output", "publish_cloudwatch_metrics": publish_cloudwatch_metrics, } inputs = [input_1, baseline, constraints] if postprocessor_path: env["post_analytics_processor_script"] = "/opt/ml/processing/code/postprocessing/" + get_file_name( postprocessor_path) post_processor_script = ProcessingInput( input_name="post_processor_script", source=postprocessor_path, destination="/opt/ml/processing/code/postprocessing", s3_data_type="S3Prefix", s3_input_mode="File", ) inputs.append(post_processor_script) if preprocessor_path: env["record_preprocessor_script"] = "/opt/ml/processing/code/preprocessing/" + get_file_name( preprocessor_path) pre_processor_script = ProcessingInput( input_name="pre_processor_script", source=preprocessor_path, destination="/opt/ml/processing/code/preprocessing", s3_data_type="S3Prefix", s3_input_mode="File", ) inputs.append(pre_processor_script) processor = Processor( image_uri=get_model_monitor_container_uri(region), instance_count=instance_count, instance_type=instance_type, role=role, env=env, ) return processor.run(inputs=inputs, outputs=[outputs])
def get_pipeline( region, role=None, default_bucket=None, model_package_group_name="sagemaker-group-insurance", pipeline_name="sagemaker-pipeline-insurance", base_job_prefix="sagemaker-featurestore-insurance", ): """Gets a SageMaker ML Pipeline instance working with on WIP data. Args: region: AWS region to create and run the pipeline. role: IAM role to create and run steps and pipeline. default_bucket: the bucket to use for storing the artifacts Returns: an instance of a pipeline """ sagemaker_session = get_session(region, default_bucket) if role is None: role = sagemaker.session.get_execution_role(sagemaker_session) # parameters for pipeline execution processing_instance_count = ParameterInteger(name="ProcessingInstanceCount", default_value=1) processing_instance_type = ParameterString( name="ProcessingInstanceType", default_value="ml.m5.xlarge" ) training_instance_type = ParameterString( name="TrainingInstanceType", default_value="ml.m5.xlarge" ) model_approval_status = ParameterString( name="ModelApprovalStatus", default_value="Approved" ) # processing step for feature engineering sklearn_processor = SKLearnProcessor( framework_version="0.23-1", instance_type=processing_instance_type, instance_count=processing_instance_count, base_job_name=f"{base_job_prefix}/sklearn-insurance-preprocess", sagemaker_session=sagemaker_session, role=role, ) step_process = ProcessingStep( name="PreprocessInsuranceData", processor=sklearn_processor, outputs=[ ProcessingOutput(output_name="train", source="/opt/ml/processing/train"), ProcessingOutput(output_name="validation", source="/opt/ml/processing/validation"), ProcessingOutput(output_name="test", source="/opt/ml/processing/test"), ], code=os.path.join(BASE_DIR, "preprocess.py"), job_arguments=["--input_dataset_1", "41214", "--input_dataset_2", "41215",], ) ''' # feature store step feature_path = 's3://' + default_bucket+'/'+base_job_prefix + '/features' image_uri = sagemaker.image_uris.retrieve( framework="xgboost", region=region, version="1.0-1", py_version="py3", instance_type=training_instance_type, ) feature_processor = ScriptProcessor( image_uri=image_uri, command=["python3"], instance_type=processing_instance_type, instance_count=1, base_job_name=f"{base_job_prefix}/script-insurance-feature-store", sagemaker_session=sagemaker_session, role=role, ) step_feature = ProcessingStep( name="FeatureStoreInsuranceData", processor=feature_processor, outputs=[ ProcessingOutput(output_name="train", source="/opt/ml/processing/training_input"), ], code=os.path.join(BASE_DIR, "feature_store.py"), job_arguments=["feature_s3_url", feature_path, "--feature_group_name", "sagemaker-featurestore-insurance"], ) ''' # training step for generating model artifacts model_path = 's3://' + default_bucket+'/'+base_job_prefix + '/features' image_uri = sagemaker.image_uris.retrieve( framework="xgboost", region=region, version="1.0-1", py_version="py3", instance_type=training_instance_type, ) xgb_train = Estimator( image_uri=image_uri, instance_type=training_instance_type, instance_count=1, output_path=model_path, base_job_name=f"{base_job_prefix}/insurance-train", sagemaker_session=sagemaker_session, role=role, ) xgb_train.set_hyperparameters(objective = "reg:tweedie", num_round = 50) step_train = TrainingStep( name="TrainAbaloneModel", estimator=xgb_train, inputs={ "train": TrainingInput( s3_data=step_process.properties.ProcessingOutputConfig.Outputs[ "train" ].S3Output.S3Uri, content_type="text/csv", ), "validation": TrainingInput( s3_data=step_process.properties.ProcessingOutputConfig.Outputs[ "validation" ].S3Output.S3Uri, content_type="text/csv", ), }, ) # processing step for evaluation script_eval = ScriptProcessor( image_uri=image_uri, command=["python3"], instance_type=processing_instance_type, instance_count=1, base_job_name=f"{base_job_prefix}/script-wip-eval", sagemaker_session=sagemaker_session, role=role, ) evaluation_report = PropertyFile( name="WipEvaluationReport", output_name="evaluation", path="evaluation.json", ) step_eval = ProcessingStep( name="EvaluateWipModel", processor=script_eval, inputs=[ ProcessingInput( source=step_train.properties.ModelArtifacts.S3ModelArtifacts, destination="/opt/ml/processing/model", ), ProcessingInput( source=step_process.properties.ProcessingOutputConfig.Outputs[ "test" ].S3Output.S3Uri, destination="/opt/ml/processing/test", ), ], outputs=[ ProcessingOutput(output_name="evaluation", source="/opt/ml/processing/evaluation"), ], code=os.path.join(BASE_DIR, "evaluate.py"), property_files=[evaluation_report], ) # register model step that will be conditionally executed model_metrics = ModelMetrics( model_statistics=MetricsSource( s3_uri="{}/evaluation.json".format( step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0]["S3Output"]["S3Uri"] ), content_type="application/json" ) ) step_register = RegisterModel( name="register-insurance-model", estimator=xgb_train, model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts, content_types=["text/csv"], response_types=["text/csv"], inference_instances=["ml.t2.medium", "ml.m5.large"], transform_instances=["ml.m5.large"], model_package_group_name=model_package_group_name, approval_status=model_approval_status, model_metrics=model_metrics, ) # condition step for evaluating model quality and branching execution cond_lte = ConditionLessThanOrEqualTo( left=JsonGet( step=step_eval, property_file=evaluation_report, json_path="regression_metrics.mse.value" ), right=6.0, ) step_cond = ConditionStep( name="CheckMSEWipEvaluation", conditions=[cond_lte], if_steps=[], else_steps=[step_register], ) pipeline = Pipeline( name=pipeline_name, parameters=[ processing_instance_type, processing_instance_count, training_instance_type, model_approval_status, ], steps=[step_process, step_train, step_eval, step_cond], sagemaker_session=sagemaker_session, ) return pipeline
def _run( self, data_config, analysis_config, wait, logs, job_name, kms_key, experiment_config, ): """Runs a ProcessingJob with the Sagemaker Clarify container and an analysis config. Args: data_config (:class:`~sagemaker.clarify.DataConfig`): Config of the input/output data. analysis_config (dict): Config following the analysis_config.json format. wait (bool): Whether the call should wait until the job completes (default: True). logs (bool): Whether to show the logs produced by the job. Only meaningful when ``wait`` is True (default: True). job_name (str): Processing job name. kms_key (str): The ARN of the KMS key that is used to encrypt the user code file (default: None). experiment_config (dict[str, str]): Experiment management configuration. Dictionary contains three optional keys: 'ExperimentName', 'TrialName', and 'TrialComponentDisplayName'. """ analysis_config["methods"]["report"] = { "name": "report", "title": "Analysis Report" } with tempfile.TemporaryDirectory() as tmpdirname: analysis_config_file = os.path.join(tmpdirname, "analysis_config.json") with open(analysis_config_file, "w") as f: json.dump(analysis_config, f) s3_analysis_config_file = _upload_analysis_config( analysis_config_file, data_config.s3_output_path, self.sagemaker_session, kms_key, ) config_input = ProcessingInput( input_name="analysis_config", source=s3_analysis_config_file, destination=self._CLARIFY_CONFIG_INPUT, s3_data_type="S3Prefix", s3_input_mode="File", s3_compression_type="None", ) data_input = ProcessingInput( input_name="dataset", source=data_config.s3_data_input_path, destination=self._CLARIFY_DATA_INPUT, s3_data_type="S3Prefix", s3_input_mode="File", s3_data_distribution_type=data_config. s3_data_distribution_type, s3_compression_type=data_config.s3_compression_type, ) result_output = ProcessingOutput( source=self._CLARIFY_OUTPUT, destination=data_config.s3_output_path, output_name="analysis_result", s3_upload_mode="EndOfJob", ) super().run( inputs=[data_input, config_input], outputs=[result_output], wait=wait, logs=logs, job_name=job_name, kms_key=kms_key, experiment_config=experiment_config, )
def test_tuning_multi_algos( sagemaker_session, role, cpu_instance_type, pipeline_name, region_name, script_dir, athena_dataset_definition, ): base_dir = os.path.join(DATA_DIR, "pytorch_mnist") entry_point = os.path.join(base_dir, "mnist.py") input_path = sagemaker_session.upload_data( path=os.path.join(base_dir, "training"), key_prefix="integ-test-data/pytorch_mnist/training", ) instance_count = ParameterInteger(name="InstanceCount", default_value=1) instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge") input_data = f"s3://sagemaker-sample-data-{region_name}/processing/census/census-income.csv" sklearn_processor = SKLearnProcessor( framework_version="0.20.0", instance_type=instance_type, instance_count=instance_count, base_job_name="test-sklearn", sagemaker_session=sagemaker_session, role=role, ) property_file = PropertyFile(name="DataAttributes", output_name="attributes", path="attributes.json") step_process = ProcessingStep( name="my-process", display_name="ProcessingStep", description="description for Processing step", processor=sklearn_processor, inputs=[ ProcessingInput(source=input_data, destination="/opt/ml/processing/input"), ProcessingInput(dataset_definition=athena_dataset_definition), ], outputs=[ ProcessingOutput(output_name="train_data", source="/opt/ml/processing/train"), ProcessingOutput(output_name="attributes", source="/opt/ml/processing/attributes.json"), ], property_files=[property_file], code=os.path.join(script_dir, "preprocessing.py"), ) static_hp_1 = ParameterString(name="InstanceType", default_value="ml.m5.xlarge") json_get_hp = JsonGet(step_name=step_process.name, property_file=property_file, json_path="train_size") pytorch_estimator = PyTorch( entry_point=entry_point, role=role, framework_version="1.5.0", py_version="py3", instance_count=instance_count, instance_type=instance_type, sagemaker_session=sagemaker_session, enable_sagemaker_metrics=True, max_retry_attempts=3, hyperparameters={ "static-hp": static_hp_1, "train_size": json_get_hp }, ) min_batch_size = ParameterString(name="MinBatchSize", default_value="64") max_batch_size = json_get_hp tuner = HyperparameterTuner.create( estimator_dict={ "estimator-1": pytorch_estimator, "estimator-2": pytorch_estimator, }, objective_metric_name_dict={ "estimator-1": "test:acc", "estimator-2": "test:acc", }, hyperparameter_ranges_dict={ "estimator-1": { "batch-size": IntegerParameter(min_batch_size, max_batch_size) }, "estimator-2": { "batch-size": IntegerParameter(min_batch_size, max_batch_size) }, }, metric_definitions_dict={ "estimator-1": [{ "Name": "test:acc", "Regex": "Overall test accuracy: (.*?);" }], "estimator-2": [{ "Name": "test:acc", "Regex": "Overall test accuracy: (.*?);" }], }, ) inputs = { "estimator-1": TrainingInput(s3_data=input_path), "estimator-2": TrainingInput(s3_data=input_path), } step_tune = TuningStep( name="my-tuning-step", tuner=tuner, inputs=inputs, ) pipeline = Pipeline( name=pipeline_name, parameters=[ instance_count, instance_type, min_batch_size, max_batch_size ], steps=[step_process, step_tune], sagemaker_session=sagemaker_session, ) try: response = pipeline.create(role) create_arn = response["PipelineArn"] assert re.match( rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}", create_arn, ) execution = pipeline.start(parameters={}) assert re.match( rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}/execution/", execution.arn, ) finally: try: pipeline.delete() except Exception: pass
instance_type='local', role=role) print('Starting processing job.') print( 'Note: if launching for the first time in local mode, container image download might take a few minutes to complete.' ) processor.run(code='processing_script.py', inputs=[ ProcessingInput( source='./dependencies/', destination='/opt/ml/processing/dependencies/'), ProcessingInput(source='./input_data/', destination='/opt/ml/processing/input_data/') ], outputs=[ ProcessingOutput(output_name='tokenized_words_data', source='/opt/ml/processing/processed_data/') ], arguments=['job-type', 'word-tokenize']) preprocessing_job_description = processor.jobs[-1].describe() output_config = preprocessing_job_description['ProcessingOutputConfig'] print(output_config) for output in output_config['Outputs']: if output['OutputName'] == 'tokenized_words_data': tokenized_words_data_file = output['S3Output']['S3Uri'] print('Output file is located on: {}'.format(tokenized_words_data_file))
def test_processor_with_all_parameters(sagemaker_session): processor = Processor( role=ROLE, image_uri=CUSTOM_IMAGE_URI, instance_count=1, instance_type="ml.m4.xlarge", sagemaker_session=sagemaker_session, entrypoint=[ "python3", "/opt/ml/processing/input/code/processing_code.py" ], volume_size_in_gb=100, volume_kms_key="arn:aws:kms:us-west-2:012345678901:key/volume-kms-key", output_kms_key="arn:aws:kms:us-west-2:012345678901:key/output-kms-key", max_runtime_in_seconds=3600, base_job_name="processor_base_name", env={"my_env_variable": "my_env_variable_value"}, tags=[{ "Key": "my-tag", "Value": "my-tag-value" }], network_config=NetworkConfig( subnets=["my_subnet_id"], security_group_ids=["my_security_group_id"], enable_network_isolation=True, encrypt_inter_container_traffic=True, ), ) processor.run( inputs=[ ProcessingInput( source="s3://path/to/my/dataset/census.csv", destination="/container/path/", input_name="my_dataset", s3_data_type="S3Prefix", s3_input_mode="File", s3_data_distribution_type="FullyReplicated", s3_compression_type="None", ) ], outputs=[ ProcessingOutput( source="/container/path/", destination="s3://uri/", output_name="my_output", s3_upload_mode="EndOfJob", ) ], arguments=["--drop-columns", "'SelfEmployed'"], wait=True, logs=False, job_name="my_job_name", experiment_config={"ExperimentName": "AnExperiment"}, ) expected_args = _get_expected_args_all_parameters( processor._current_job_name) # Drop the "code" input from expected values. expected_args["inputs"] = [expected_args["inputs"][0]] sagemaker_session.process.assert_called_with(**expected_args)
def test_sklearn_with_all_parameters(exists_mock, isfile_mock, botocore_resolver, sklearn_version, sagemaker_session): botocore_resolver.return_value.construct_endpoint.return_value = { "hostname": ECR_HOSTNAME } processor = SKLearnProcessor( role=ROLE, framework_version=sklearn_version, instance_type="ml.m4.xlarge", instance_count=1, volume_size_in_gb=100, volume_kms_key="arn:aws:kms:us-west-2:012345678901:key/volume-kms-key", output_kms_key="arn:aws:kms:us-west-2:012345678901:key/output-kms-key", max_runtime_in_seconds=3600, base_job_name="my_sklearn_processor", env={"my_env_variable": "my_env_variable_value"}, tags=[{ "Key": "my-tag", "Value": "my-tag-value" }], network_config=NetworkConfig( subnets=["my_subnet_id"], security_group_ids=["my_security_group_id"], enable_network_isolation=True, encrypt_inter_container_traffic=True, ), sagemaker_session=sagemaker_session, ) processor.run( code="/local/path/to/processing_code.py", inputs=[ ProcessingInput( source="s3://path/to/my/dataset/census.csv", destination="/container/path/", input_name="my_dataset", s3_data_type="S3Prefix", s3_input_mode="File", s3_data_distribution_type="FullyReplicated", s3_compression_type="None", ) ], outputs=[ ProcessingOutput( source="/container/path/", destination="s3://uri/", output_name="my_output", s3_upload_mode="EndOfJob", ) ], arguments=["--drop-columns", "'SelfEmployed'"], wait=True, logs=False, job_name="my_job_name", experiment_config={"ExperimentName": "AnExperiment"}, ) expected_args = _get_expected_args_all_parameters( processor._current_job_name) sklearn_image_uri = ( "246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-scikit-learn:{}-cpu-py3" ).format(sklearn_version) expected_args["app_specification"]["ImageUri"] = sklearn_image_uri sagemaker_session.process.assert_called_with(**expected_args)
def test_end_to_end_pipeline_successful_execution( sagemaker_session, region_name, role, pipeline_name, wait=False ): model_package_group_name = f"{pipeline_name}ModelPackageGroup" data_path = os.path.join(DATA_DIR, "workflow") default_bucket = sagemaker_session.default_bucket() # download the input data local_input_path = os.path.join(data_path, "abalone-dataset.csv") s3 = sagemaker_session.boto_session.resource("s3") s3.Bucket(f"sagemaker-servicecatalog-seedcode-{region_name}").download_file( "dataset/abalone-dataset.csv", local_input_path ) # # upload the input data to our bucket base_uri = f"s3://{default_bucket}/{pipeline_name}" with open(local_input_path) as data: body = data.read() input_data_uri = S3Uploader.upload_string_as_file_body( body=body, desired_s3_uri=f"{base_uri}/abalone-dataset.csv", sagemaker_session=sagemaker_session, ) # download batch transform data local_batch_path = os.path.join(data_path, "abalone-dataset-batch") s3.Bucket(f"sagemaker-servicecatalog-seedcode-{region_name}").download_file( "dataset/abalone-dataset-batch", local_batch_path ) # upload the batch transform data with open(local_batch_path) as data: body = data.read() batch_data_uri = S3Uploader.upload_string_as_file_body( body=body, desired_s3_uri=f"{base_uri}/abalone-dataset-batch", sagemaker_session=sagemaker_session, ) # define parameters processing_instance_count = ParameterInteger(name="ProcessingInstanceCount", default_value=1) processing_instance_type = ParameterString( name="ProcessingInstanceType", default_value="ml.m5.xlarge" ) training_instance_type = ParameterString( name="TrainingInstanceType", default_value="ml.m5.xlarge" ) model_approval_status = ParameterString(name="ModelApprovalStatus", default_value="Approved") input_data = ParameterString( name="InputData", default_value=input_data_uri, ) batch_data = ParameterString( name="BatchData", default_value=batch_data_uri, ) # define processing step framework_version = "0.23-1" sklearn_processor = SKLearnProcessor( framework_version=framework_version, instance_type=processing_instance_type, instance_count=processing_instance_count, base_job_name=f"{pipeline_name}-process", role=role, sagemaker_session=sagemaker_session, ) step_process = ProcessingStep( name="AbaloneProcess", processor=sklearn_processor, inputs=[ ProcessingInput(source=input_data, destination="/opt/ml/processing/input"), ], outputs=[ ProcessingOutput(output_name="train", source="/opt/ml/processing/train"), ProcessingOutput(output_name="validation", source="/opt/ml/processing/validation"), ProcessingOutput(output_name="test", source="/opt/ml/processing/test"), ], code=os.path.join(data_path, "abalone/preprocessing.py"), ) # define training step model_path = f"s3://{default_bucket}/{pipeline_name}Train" image_uri = image_uris.retrieve( framework="xgboost", region=region_name, version="1.0-1", py_version="py3", instance_type=training_instance_type, ) xgb_train = Estimator( image_uri=image_uri, instance_type=training_instance_type, instance_count=1, output_path=model_path, role=role, sagemaker_session=sagemaker_session, ) xgb_train.set_hyperparameters( objective="reg:linear", num_round=50, max_depth=5, eta=0.2, gamma=4, min_child_weight=6, subsample=0.7, silent=0, ) step_train = TrainingStep( name="AbaloneTrain", estimator=xgb_train, inputs={ "train": TrainingInput( s3_data=step_process.properties.ProcessingOutputConfig.Outputs[ "train" ].S3Output.S3Uri, content_type="text/csv", ), "validation": TrainingInput( s3_data=step_process.properties.ProcessingOutputConfig.Outputs[ "validation" ].S3Output.S3Uri, content_type="text/csv", ), }, ) # define evaluation step script_eval = ScriptProcessor( image_uri=image_uri, command=["python3"], instance_type=processing_instance_type, instance_count=1, base_job_name=f"{pipeline_name}-eval", role=role, sagemaker_session=sagemaker_session, ) evaluation_report = PropertyFile( name="EvaluationReport", output_name="evaluation", path="evaluation.json" ) step_eval = ProcessingStep( name="AbaloneEval", processor=script_eval, inputs=[ ProcessingInput( source=step_train.properties.ModelArtifacts.S3ModelArtifacts, destination="/opt/ml/processing/model", ), ProcessingInput( source=step_process.properties.ProcessingOutputConfig.Outputs[ "test" ].S3Output.S3Uri, destination="/opt/ml/processing/test", ), ], outputs=[ ProcessingOutput(output_name="evaluation", source="/opt/ml/processing/evaluation"), ], code=os.path.join(data_path, "abalone/evaluation.py"), property_files=[evaluation_report], ) # define create model step model = Model( image_uri=image_uri, model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts, sagemaker_session=sagemaker_session, role=role, ) inputs = CreateModelInput( instance_type="ml.m5.large", accelerator_type="ml.eia1.medium", ) step_create_model = CreateModelStep( name="AbaloneCreateModel", model=model, inputs=inputs, ) # define transform step transformer = Transformer( model_name=step_create_model.properties.ModelName, instance_type="ml.m5.xlarge", instance_count=1, output_path=f"s3://{default_bucket}/{pipeline_name}Transform", sagemaker_session=sagemaker_session, ) step_transform = TransformStep( name="AbaloneTransform", transformer=transformer, inputs=TransformInput(data=batch_data), ) # define register model step model_metrics = ModelMetrics( model_statistics=MetricsSource( s3_uri="{}/evaluation.json".format( step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0]["S3Output"]["S3Uri"] ), content_type="application/json", ) ) step_register = RegisterModel( name="AbaloneRegisterModel", estimator=xgb_train, model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts, content_types=["text/csv"], response_types=["text/csv"], inference_instances=["ml.t2.medium", "ml.m5.xlarge"], transform_instances=["ml.m5.xlarge"], model_package_group_name=model_package_group_name, approval_status=model_approval_status, model_metrics=model_metrics, ) # define condition step cond_lte = ConditionLessThanOrEqualTo( left=JsonGet( step_name=step_eval.name, property_file=evaluation_report, json_path="regression_metrics.mse.value", ), right=20.0, ) step_cond = ConditionStep( name="AbaloneMSECond", conditions=[cond_lte], if_steps=[step_register, step_create_model, step_transform], else_steps=[], ) # define pipeline pipeline = Pipeline( name=pipeline_name, parameters=[ processing_instance_type, processing_instance_count, training_instance_type, model_approval_status, input_data, batch_data, ], steps=[step_process, step_train, step_eval, step_cond], sagemaker_session=sagemaker_session, ) pipeline.create(role) execution = pipeline.start() execution_arn = execution.arn if wait: execution.wait() return execution_arn
ProcessingInput( source=input_data, destination="/opt/ml/processing/input", input_name="input-1", ), ProcessingInput( source=input_code, destination="/opt/ml/processing/input/code", input_name="code", ), ] outputs = [ ProcessingOutput( source="/opt/ml/processing/train", destination="{}/{}".format(output_data, "train_data"), output_name="train_data", ), ProcessingOutput( source="/opt/ml/processing/test", destination="{}/{}".format(output_data, "test_data"), output_name="test_data", ), ] processor = SKLearnProcessor( framework_version="0.20.0", role=role.role_arn, instance_type="ml.m5.xlarge", instance_count=1, )
def test_workflow_with_clarify( data_config, data_bias_config, model_config, model_predicted_label_config, pipeline_name, role, sagemaker_session, ): instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge") instance_count = ParameterInteger(name="InstanceCount", default_value=1) analysis_config = data_config.get_config() analysis_config.update(data_bias_config.get_config()) ( probability_threshold, predictor_config, ) = model_predicted_label_config.get_predictor_config() predictor_config.update(model_config.get_predictor_config()) analysis_config["methods"] = {"post_training_bias": {"methods": "all"}} analysis_config["predictor"] = predictor_config analysis_config["probability_threshold"] = probability_threshold analysis_config["methods"]["report"] = { "name": "report", "title": "Analysis Report" } with tempfile.TemporaryDirectory() as tmpdirname: analysis_config_file = os.path.join(tmpdirname, "analysis_config.json") with open(analysis_config_file, "w") as f: json.dump(analysis_config, f) config_input = ProcessingInput( input_name="analysis_config", source=analysis_config_file, destination="/opt/ml/processing/input/config", s3_data_type="S3Prefix", s3_input_mode="File", s3_compression_type="None", ) data_input = ProcessingInput( input_name="dataset", source=data_config.s3_data_input_path, destination="/opt/ml/processing/input/data", s3_data_type="S3Prefix", s3_input_mode="File", s3_data_distribution_type=data_config.s3_data_distribution_type, s3_compression_type=data_config.s3_compression_type, ) result_output = ProcessingOutput( source="/opt/ml/processing/output", destination=data_config.s3_output_path, output_name="analysis_result", s3_upload_mode="EndOfJob", ) processor = SageMakerClarifyProcessor( role="SageMakerRole", instance_count=instance_count, instance_type=instance_type, sagemaker_session=sagemaker_session, ) property_file = PropertyFile( name="BiasOutput", output_name="analysis_result", path="analysis.json", ) step_process = ProcessingStep( name="my-process", processor=processor, inputs=[data_input, config_input], outputs=[result_output], property_files=[property_file], ) cond_left = JsonGet( step=step_process, property_file="BiasOutput", json_path= "post_training_bias_metrics.facets.F1[0].metrics[0].value", ) step_condition = ConditionStep( name="bias-condition", conditions=[ConditionLessThanOrEqualTo(left=cond_left, right=1)], if_steps=[], else_steps=[], ) pipeline = Pipeline( name=pipeline_name, parameters=[instance_type, instance_count], steps=[step_process, step_condition], sagemaker_session=sagemaker_session, ) try: response = pipeline.create(role) create_arn = response["PipelineArn"] execution = pipeline.start(parameters={}) response = execution.describe() assert response["PipelineArn"] == create_arn try: execution.wait(delay=30, max_attempts=60) except WaiterError: pass execution_steps = execution.list_steps() assert len(execution_steps) == 2 assert execution_steps[1]["StepName"] == "my-process" assert execution_steps[1]["StepStatus"] == "Succeeded" assert execution_steps[0]["StepName"] == "bias-condition" assert execution_steps[0]["StepStatus"] == "Succeeded" assert execution_steps[0]["Metadata"]["Condition"][ "Outcome"] == "True" finally: try: pipeline.delete() except Exception: pass
def run_model_monitor_job_processor(region, instance_type, role, data_capture_path, preprocessor_path, postprocessor_path, statistics_path, constraints_path, reports_path): data_capture_sub_path = data_capture_path[data_capture_path. rfind('datacapture/'):] data_capture_sub_path = data_capture_sub_path[data_capture_sub_path. find('/') + 1:] processing_output_paths = reports_path + '/' + data_capture_sub_path input_1 = ProcessingInput( input_name='input_1', source=data_capture_path, destination='/opt/ml/processing/input/endpoint/' + data_capture_sub_path, s3_data_type='S3Prefix', s3_input_mode='File') baseline = ProcessingInput(input_name='baseline', source=statistics_path, destination='/opt/ml/processing/baseline/stats', s3_data_type='S3Prefix', s3_input_mode='File') constraints = ProcessingInput( input_name='constraints', source=constraints_path, destination='/opt/ml/processing/baseline/constraints', s3_data_type='S3Prefix', s3_input_mode='File') post_processor_script = ProcessingInput( input_name='post_processor_script', source=postprocessor_path, destination='/opt/ml/processing/code/postprocessing', s3_data_type='S3Prefix', s3_input_mode='File') pre_processor_script = ProcessingInput( input_name='pre_processor_script', source=preprocessor_path, destination='/opt/ml/processing/code/preprocessing', s3_data_type='S3Prefix', s3_input_mode='File') outputs = ProcessingOutput(output_name='result', source='/opt/ml/processing/output', destination=processing_output_paths, s3_upload_mode='Continuous') processor = Processor( image_uri=get_model_monitor_container_uri(region), instance_count=1, instance_type=instance_type, role=role, env={ 'baseline_constraints': '/opt/ml/processing/baseline/constraints/constraints.json', 'baseline_statistics': '/opt/ml/processing/baseline/stats/statistics.json', 'dataset_format': '{"sagemakerCaptureJson":{"captureIndexNames":["endpointInput","endpointOutput"]}}', 'dataset_source': '/opt/ml/processing/input/endpoint', 'output_path': '/opt/ml/processing/output', 'post_analytics_processor_script': '/opt/ml/processing/code/postprocessing/postprocessor.py', 'publish_cloudwatch_metrics': 'Disabled', 'record_preprocessor_script': '/opt/ml/processing/code/preprocessing/preprocessor.py' }) return processor.run(inputs=[ input_1, baseline, constraints, post_processor_script, pre_processor_script ], outputs=[outputs])
role = 'arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001' processor = ScriptProcessor( command=['python3'], image_uri='sagemaker-delta-sharing-processing-local', role=role, instance_count=1, instance_type='local') processor.run(code='processing_script.py', inputs=[ ProcessingInput(source='./profile/', destination='/opt/ml/processing/profile/') ], outputs=[ ProcessingOutput(output_name='delta_lake_processed_data', source='/opt/ml/processing/processed_data/') ]) preprocessing_job_description = processor.jobs[-1].describe() output_config = preprocessing_job_description['ProcessingOutputConfig'] print(output_config) for output in output_config['Outputs']: if output['OutputName'] == 'delta_lake_processed_data': delta_lake_processed_data_file = output['S3Output']['S3Uri'] bucket = delta_lake_processed_data_file.split("/")[:3][2] output_file_name = '/'.join( delta_lake_processed_data_file.split("/") [3:]) + "/total_cases_per_location.csv"
def test_processing_step_with_placeholders(sklearn_processor_fixture, sagemaker_session, sfn_client, sfn_role_arn, sagemaker_role_arn): region = boto3.session.Session().region_name input_data = f"s3://sagemaker-sample-data-{region}/processing/census/census-income.csv" input_s3 = sagemaker_session.upload_data( path=os.path.join(DATA_DIR, 'sklearn_processing'), bucket=sagemaker_session.default_bucket(), key_prefix='integ-test-data/sklearn_processing/code') output_s3 = f"s3://{sagemaker_session.default_bucket()}/integ-test-data/sklearn_processing" inputs = [ ProcessingInput(source=input_data, destination='/opt/ml/processing/input', input_name='input-1'), ProcessingInput(source=input_s3 + '/preprocessor.py', destination='/opt/ml/processing/input/code', input_name='code'), ] outputs = [ ProcessingOutput(source='/opt/ml/processing/train', destination=output_s3 + '/train_data', output_name='train_data'), ProcessingOutput(source='/opt/ml/processing/test', destination=output_s3 + '/test_data', output_name='test_data'), ] # Build workflow definition execution_input = ExecutionInput( schema={ 'image_uri': str, 'instance_count': int, 'entrypoint': str, 'role': str, 'volume_size_in_gb': int, 'max_runtime_in_seconds': int, 'container_arguments': [str], }) parameters = { 'AppSpecification': { 'ContainerEntrypoint': execution_input['entrypoint'], 'ImageUri': execution_input['image_uri'] }, 'ProcessingResources': { 'ClusterConfig': { 'InstanceCount': execution_input['instance_count'], 'VolumeSizeInGB': execution_input['volume_size_in_gb'] } }, 'RoleArn': execution_input['role'], 'StoppingCondition': { 'MaxRuntimeInSeconds': execution_input['max_runtime_in_seconds'] } } job_name = generate_job_name() processing_step = ProcessingStep( 'create_processing_job_step', processor=sklearn_processor_fixture, job_name=job_name, inputs=inputs, outputs=outputs, container_arguments=execution_input['container_arguments'], container_entrypoint=execution_input['entrypoint'], parameters=parameters) processing_step.add_retry(SAGEMAKER_RETRY_STRATEGY) workflow_graph = Chain([processing_step]) with timeout(minutes=DEFAULT_TIMEOUT_MINUTES): workflow = create_workflow_and_check_definition( workflow_graph=workflow_graph, workflow_name=unique_name_from_base( "integ-test-processing-step-workflow"), sfn_client=sfn_client, sfn_role_arn=sfn_role_arn) execution_input = { 'image_uri': '683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-scikit-learn:0.20.0-cpu-py3', 'instance_count': 1, 'entrypoint': ['python3', '/opt/ml/processing/input/code/preprocessor.py'], 'role': sagemaker_role_arn, 'volume_size_in_gb': 30, 'max_runtime_in_seconds': 500, 'container_arguments': ['--train-test-split-ratio', '0.2'] } # Execute workflow execution = workflow.execute(inputs=execution_input) execution_output = execution.get_output(wait=True) # Check workflow output assert execution_output.get("ProcessingJobStatus") == "Completed" # Cleanup state_machine_delete_wait(sfn_client, workflow.state_machine_arn)