Esempio n. 1
0
    def arguments(self) -> RequestType:
        """The arguments dictionary that is used to call `create_processing_job`.

        NOTE: The `CreateProcessingJob` request is not quite the args list that workflow needs.
        `ProcessingJobName` and `ExperimentConfig` cannot be included in the arguments.
        """
        if self.step_args:
            request_dict = self.step_args
        else:
            normalized_inputs, normalized_outputs = self.processor._normalize_args(
                job_name=self.job_name,
                arguments=self.job_arguments,
                inputs=self.inputs,
                outputs=self.outputs,
                code=self.code,
                kms_key=self.kms_key,
            )
            process_args = ProcessingJob._get_process_args(
                self.processor,
                normalized_inputs,
                normalized_outputs,
                experiment_config=dict())
            request_dict = self.processor.sagemaker_session._get_process_request(
                **process_args)

        request_dict.pop("ProcessingJobName", None)
        request_dict.pop("ExperimentConfig", None)
        return request_dict
Esempio n. 2
0
def test_processing_job_from_processing_arn(sagemaker_session):
    processing_job = ProcessingJob.from_processing_arn(
        sagemaker_session=sagemaker_session,
        processing_job_arn="arn:aws:sagemaker:dummy-region:dummy-account-number:processing-job/dummy-job-name",
    )
    assert isinstance(processing_job, ProcessingJob)
    assert [
        processing_input._to_request_dict() for processing_input in processing_job.inputs
    ] == PROCESSING_JOB_DESCRIPTION["ProcessingInputs"]
    assert [
        processing_output._to_request_dict() for processing_output in processing_job.outputs
    ] == PROCESSING_JOB_DESCRIPTION["ProcessingOutputConfig"]["Outputs"]
    assert (
        processing_job.output_kms_key
        == PROCESSING_JOB_DESCRIPTION["ProcessingOutputConfig"]["KmsKeyId"]
    )
def test_processing_job_from_processing_arn(sagemaker_session):
    processing_job = ProcessingJob.from_processing_arn(
        sagemaker_session=sagemaker_session,
        processing_job_arn="arn:aws:sagemaker:dummy-region:dummy-account-number:processing-job/dummy-job-name",
    )

    assert isinstance(processing_job, ProcessingJob)
    assert [
        processing_input._to_request_dict() for processing_input in processing_job.inputs
    ] == _get_describe_response_inputs_and_ouputs()["ProcessingInputs"]
    assert [
        processing_output._to_request_dict() for processing_output in processing_job.outputs
    ] == _get_describe_response_inputs_and_ouputs()["ProcessingOutputConfig"]["Outputs"]
    assert (
        processing_job.output_kms_key
        == _get_describe_response_inputs_and_ouputs()["ProcessingOutputConfig"]["KmsKeyId"]
    )
Esempio n. 4
0
    def arguments(self) -> RequestType:
        """The arguments dict that is used to define the QualityCheck step."""
        normalized_inputs, normalized_outputs = self._baselining_processor._normalize_args(
            inputs=self._baseline_job_inputs,
            outputs=[self._baseline_output],
        )
        process_args = ProcessingJob._get_process_args(
            self._baselining_processor,
            normalized_inputs,
            normalized_outputs,
            experiment_config=dict(),
        )
        request_dict = self._baselining_processor.sagemaker_session._get_process_request(
            **process_args)
        if "ProcessingJobName" in request_dict:
            request_dict.pop("ProcessingJobName")

        return request_dict
Esempio n. 5
0
    def arguments(self) -> RequestType:
        """The arguments dict that is used to call `create_processing_job`.

        NOTE: The CreateProcessingJob request is not quite the args list that workflow needs.
        ProcessingJobName and ExperimentConfig cannot be included in the arguments.
        """
        normalized_inputs, normalized_outputs = self.processor._normalize_args(
            arguments=self.job_arguments,
            inputs=self.inputs,
            outputs=self.outputs,
            code=self.code,
        )
        process_args = ProcessingJob._get_process_args(
            self.processor, normalized_inputs, normalized_outputs, experiment_config=dict()
        )
        request_dict = self.processor.sagemaker_session._get_process_request(**process_args)
        request_dict.pop("ProcessingJobName")

        return request_dict
Esempio n. 6
0
def test_script_processor_with_no_inputs_or_outputs(sagemaker_session,
                                                    image_uri,
                                                    cpu_instance_type):
    script_processor = ScriptProcessor(
        role=ROLE,
        image_uri=image_uri,
        command=["python3"],
        instance_count=1,
        instance_type=cpu_instance_type,
        volume_size_in_gb=100,
        volume_kms_key=None,
        max_runtime_in_seconds=3600,
        base_job_name="test-script-processor-with-no-inputs-or-outputs",
        env={"DUMMY_ENVIRONMENT_VARIABLE": "dummy-value"},
        tags=[{
            "Key": "dummy-tag",
            "Value": "dummy-tag-value"
        }],
        sagemaker_session=sagemaker_session,
    )

    script_processor.run(code=os.path.join(DATA_DIR, "dummy_script.py"),
                         arguments=["-v"],
                         wait=True,
                         logs=True)

    job_description = script_processor.latest_job.describe()

    assert job_description["ProcessingInputs"][0]["InputName"] == "code"

    assert job_description["ProcessingJobName"].startswith(
        "test-script-processor-with-no-inputs")

    assert job_description["ProcessingJobStatus"] == "Completed"

    assert job_description["ProcessingResources"]["ClusterConfig"][
        "InstanceCount"] == 1
    assert (job_description["ProcessingResources"]["ClusterConfig"]
            ["InstanceType"] == cpu_instance_type)
    assert job_description["ProcessingResources"]["ClusterConfig"][
        "VolumeSizeInGB"] == 100

    assert job_description["AppSpecification"]["ContainerArguments"] == ["-v"]
    assert job_description["AppSpecification"]["ContainerEntrypoint"] == [
        "python3",
        "/opt/ml/processing/input/code/dummy_script.py",
    ]
    assert job_description["AppSpecification"]["ImageUri"] == image_uri

    assert job_description["Environment"] == {
        "DUMMY_ENVIRONMENT_VARIABLE": "dummy-value"
    }

    assert ROLE in job_description["RoleArn"]

    assert job_description["StoppingCondition"] == {
        "MaxRuntimeInSeconds": 3600
    }

    job_from_name = ProcessingJob.from_processing_name(
        sagemaker_session=sagemaker_session,
        processing_job_name=job_description["ProcessingJobName"],
    )
    job_description = job_from_name.describe()

    assert job_description["ProcessingInputs"][0]["InputName"] == "code"

    assert job_description["ProcessingJobName"].startswith(
        "test-script-processor-with-no-inputs")

    assert job_description["ProcessingJobStatus"] == "Completed"

    assert job_description["ProcessingResources"]["ClusterConfig"][
        "InstanceCount"] == 1
    assert (job_description["ProcessingResources"]["ClusterConfig"]
            ["InstanceType"] == cpu_instance_type)
    assert job_description["ProcessingResources"]["ClusterConfig"][
        "VolumeSizeInGB"] == 100

    assert job_description["AppSpecification"]["ContainerArguments"] == ["-v"]
    assert job_description["AppSpecification"]["ContainerEntrypoint"] == [
        "python3",
        "/opt/ml/processing/input/code/dummy_script.py",
    ]
    assert job_description["AppSpecification"]["ImageUri"] == image_uri

    assert job_description["Environment"] == {
        "DUMMY_ENVIRONMENT_VARIABLE": "dummy-value"
    }

    assert ROLE in job_description["RoleArn"]

    assert job_description["StoppingCondition"] == {
        "MaxRuntimeInSeconds": 3600
    }