def arguments(self) -> RequestType: """The arguments dictionary that is used to call `create_processing_job`. NOTE: The `CreateProcessingJob` request is not quite the args list that workflow needs. `ProcessingJobName` and `ExperimentConfig` cannot be included in the arguments. """ if self.step_args: request_dict = self.step_args else: normalized_inputs, normalized_outputs = self.processor._normalize_args( job_name=self.job_name, arguments=self.job_arguments, inputs=self.inputs, outputs=self.outputs, code=self.code, kms_key=self.kms_key, ) process_args = ProcessingJob._get_process_args( self.processor, normalized_inputs, normalized_outputs, experiment_config=dict()) request_dict = self.processor.sagemaker_session._get_process_request( **process_args) request_dict.pop("ProcessingJobName", None) request_dict.pop("ExperimentConfig", None) return request_dict
def test_processing_job_from_processing_arn(sagemaker_session): processing_job = ProcessingJob.from_processing_arn( sagemaker_session=sagemaker_session, processing_job_arn="arn:aws:sagemaker:dummy-region:dummy-account-number:processing-job/dummy-job-name", ) assert isinstance(processing_job, ProcessingJob) assert [ processing_input._to_request_dict() for processing_input in processing_job.inputs ] == PROCESSING_JOB_DESCRIPTION["ProcessingInputs"] assert [ processing_output._to_request_dict() for processing_output in processing_job.outputs ] == PROCESSING_JOB_DESCRIPTION["ProcessingOutputConfig"]["Outputs"] assert ( processing_job.output_kms_key == PROCESSING_JOB_DESCRIPTION["ProcessingOutputConfig"]["KmsKeyId"] )
def test_processing_job_from_processing_arn(sagemaker_session): processing_job = ProcessingJob.from_processing_arn( sagemaker_session=sagemaker_session, processing_job_arn="arn:aws:sagemaker:dummy-region:dummy-account-number:processing-job/dummy-job-name", ) assert isinstance(processing_job, ProcessingJob) assert [ processing_input._to_request_dict() for processing_input in processing_job.inputs ] == _get_describe_response_inputs_and_ouputs()["ProcessingInputs"] assert [ processing_output._to_request_dict() for processing_output in processing_job.outputs ] == _get_describe_response_inputs_and_ouputs()["ProcessingOutputConfig"]["Outputs"] assert ( processing_job.output_kms_key == _get_describe_response_inputs_and_ouputs()["ProcessingOutputConfig"]["KmsKeyId"] )
def arguments(self) -> RequestType: """The arguments dict that is used to define the QualityCheck step.""" normalized_inputs, normalized_outputs = self._baselining_processor._normalize_args( inputs=self._baseline_job_inputs, outputs=[self._baseline_output], ) process_args = ProcessingJob._get_process_args( self._baselining_processor, normalized_inputs, normalized_outputs, experiment_config=dict(), ) request_dict = self._baselining_processor.sagemaker_session._get_process_request( **process_args) if "ProcessingJobName" in request_dict: request_dict.pop("ProcessingJobName") return request_dict
def arguments(self) -> RequestType: """The arguments dict that is used to call `create_processing_job`. NOTE: The CreateProcessingJob request is not quite the args list that workflow needs. ProcessingJobName and ExperimentConfig cannot be included in the arguments. """ normalized_inputs, normalized_outputs = self.processor._normalize_args( arguments=self.job_arguments, inputs=self.inputs, outputs=self.outputs, code=self.code, ) process_args = ProcessingJob._get_process_args( self.processor, normalized_inputs, normalized_outputs, experiment_config=dict() ) request_dict = self.processor.sagemaker_session._get_process_request(**process_args) request_dict.pop("ProcessingJobName") return request_dict
def test_script_processor_with_no_inputs_or_outputs(sagemaker_session, image_uri, cpu_instance_type): script_processor = ScriptProcessor( role=ROLE, image_uri=image_uri, command=["python3"], instance_count=1, instance_type=cpu_instance_type, volume_size_in_gb=100, volume_kms_key=None, max_runtime_in_seconds=3600, base_job_name="test-script-processor-with-no-inputs-or-outputs", env={"DUMMY_ENVIRONMENT_VARIABLE": "dummy-value"}, tags=[{ "Key": "dummy-tag", "Value": "dummy-tag-value" }], sagemaker_session=sagemaker_session, ) script_processor.run(code=os.path.join(DATA_DIR, "dummy_script.py"), arguments=["-v"], wait=True, logs=True) job_description = script_processor.latest_job.describe() assert job_description["ProcessingInputs"][0]["InputName"] == "code" assert job_description["ProcessingJobName"].startswith( "test-script-processor-with-no-inputs") assert job_description["ProcessingJobStatus"] == "Completed" assert job_description["ProcessingResources"]["ClusterConfig"][ "InstanceCount"] == 1 assert (job_description["ProcessingResources"]["ClusterConfig"] ["InstanceType"] == cpu_instance_type) assert job_description["ProcessingResources"]["ClusterConfig"][ "VolumeSizeInGB"] == 100 assert job_description["AppSpecification"]["ContainerArguments"] == ["-v"] assert job_description["AppSpecification"]["ContainerEntrypoint"] == [ "python3", "/opt/ml/processing/input/code/dummy_script.py", ] assert job_description["AppSpecification"]["ImageUri"] == image_uri assert job_description["Environment"] == { "DUMMY_ENVIRONMENT_VARIABLE": "dummy-value" } assert ROLE in job_description["RoleArn"] assert job_description["StoppingCondition"] == { "MaxRuntimeInSeconds": 3600 } job_from_name = ProcessingJob.from_processing_name( sagemaker_session=sagemaker_session, processing_job_name=job_description["ProcessingJobName"], ) job_description = job_from_name.describe() assert job_description["ProcessingInputs"][0]["InputName"] == "code" assert job_description["ProcessingJobName"].startswith( "test-script-processor-with-no-inputs") assert job_description["ProcessingJobStatus"] == "Completed" assert job_description["ProcessingResources"]["ClusterConfig"][ "InstanceCount"] == 1 assert (job_description["ProcessingResources"]["ClusterConfig"] ["InstanceType"] == cpu_instance_type) assert job_description["ProcessingResources"]["ClusterConfig"][ "VolumeSizeInGB"] == 100 assert job_description["AppSpecification"]["ContainerArguments"] == ["-v"] assert job_description["AppSpecification"]["ContainerEntrypoint"] == [ "python3", "/opt/ml/processing/input/code/dummy_script.py", ] assert job_description["AppSpecification"]["ImageUri"] == image_uri assert job_description["Environment"] == { "DUMMY_ENVIRONMENT_VARIABLE": "dummy-value" } assert ROLE in job_description["RoleArn"] assert job_description["StoppingCondition"] == { "MaxRuntimeInSeconds": 3600 }