Beispiel #1
0
    def _run_pipeline(
            self,
            pipeline: tfx_pipeline.Pipeline,
            exit_handler: Optional[base_node.BaseNode] = None) -> None:
        """Trigger the pipeline execution with a specific job ID."""
        # Ensure cleanup regardless of whether pipeline succeeds or fails.
        self.addCleanup(self._delete_pipeline_output,
                        pipeline.pipeline_info.pipeline_name)

        config = kubeflow_v2_dag_runner.KubeflowV2DagRunnerConfig(
            default_image=self.container_image)

        executing_kubeflow_v2_dag_runner = kubeflow_v2_dag_runner.KubeflowV2DagRunner(
            config=config, output_filename='pipeline.json')
        if exit_handler:
            executing_kubeflow_v2_dag_runner.set_exit_handler(exit_handler)

        _ = executing_kubeflow_v2_dag_runner.run(pipeline, write_out=True)

        job_id = pipeline.pipeline_info.pipeline_name
        job = pipeline_jobs.PipelineJob(
            template_path='pipeline.json',
            job_id=job_id,
            display_name=pipeline.pipeline_info.pipeline_name)
        job.run(sync=False)
        job.wait_for_resource_creation()

        vertex_client_utils.poll_job_status(job_id, _MAX_JOB_EXECUTION_TIME,
                                            _POLLING_INTERVAL_IN_SECONDS)
    def test_pipeline_failure_raises(self, mock_load_yaml_and_json, sync):
        aiplatform.init(
            project=_TEST_PROJECT,
            staging_bucket=_TEST_GCS_BUCKET_NAME,
            location=_TEST_LOCATION,
            credentials=_TEST_CREDENTIALS,
        )

        job = pipeline_jobs.PipelineJob(
            display_name=_TEST_PIPELINE_JOB_DISPLAY_NAME,
            template_path=_TEST_TEMPLATE_PATH,
            job_id=_TEST_PIPELINE_JOB_ID,
            parameter_values=_TEST_PIPELINE_PARAMETER_VALUES,
            enable_caching=True,
        )

        with pytest.raises(RuntimeError):
            job.run(
                service_account=_TEST_SERVICE_ACCOUNT,
                network=_TEST_NETWORK,
                sync=sync,
            )

            if not sync:
                job.wait()
    def test_done_method_pipeline_service(
        self,
        mock_pipeline_service_create,
        mock_pipeline_service_get,
        job_spec,
        mock_load_yaml_and_json,
    ):
        aiplatform.init(
            project=_TEST_PROJECT,
            staging_bucket=_TEST_GCS_BUCKET_NAME,
            location=_TEST_LOCATION,
            credentials=_TEST_CREDENTIALS,
        )

        job = pipeline_jobs.PipelineJob(
            display_name=_TEST_PIPELINE_JOB_DISPLAY_NAME,
            template_path=_TEST_TEMPLATE_PATH,
            job_id=_TEST_PIPELINE_JOB_ID,
            parameter_values=_TEST_PIPELINE_PARAMETER_VALUES,
            enable_caching=True,
        )

        job.submit(service_account=_TEST_SERVICE_ACCOUNT,
                   network=_TEST_NETWORK)

        assert job.done() is False

        job.wait()

        assert job.done() is True
Beispiel #4
0
    def test_run_call_pipeline_service_create(
        self, mock_pipeline_service_create, mock_pipeline_service_get, sync,
    ):
        aiplatform.init(
            project=_TEST_PROJECT,
            staging_bucket=_TEST_GCS_BUCKET_NAME,
            location=_TEST_LOCATION,
            credentials=_TEST_CREDENTIALS,
        )

        job = pipeline_jobs.PipelineJob(
            display_name=_TEST_PIPELINE_JOB_DISPLAY_NAME,
            template_path=_TEST_TEMPLATE_PATH,
            job_id=_TEST_PIPELINE_JOB_ID,
            parameter_values=_TEST_PIPELINE_PARAMETER_VALUES,
            enable_caching=True,
        )

        job.run(
            service_account=_TEST_SERVICE_ACCOUNT, network=_TEST_NETWORK, sync=sync,
        )

        if not sync:
            job.wait()

        expected_runtime_config_dict = {
            "gcs_output_directory": _TEST_GCS_BUCKET_NAME,
            "parameters": {"name_param": {"stringValue": "hello"}},
        }
        runtime_config = gca_pipeline_job_v1beta1.PipelineJob.RuntimeConfig()._pb
        json_format.ParseDict(expected_runtime_config_dict, runtime_config)

        # Construct expected request
        expected_gapic_pipeline_job = gca_pipeline_job_v1beta1.PipelineJob(
            display_name=_TEST_PIPELINE_JOB_DISPLAY_NAME,
            name=_TEST_PIPELINE_JOB_NAME,
            pipeline_spec={
                "components": {},
                "pipelineInfo": _TEST_PIPELINE_JOB_SPEC["pipelineSpec"]["pipelineInfo"],
                "root": _TEST_PIPELINE_JOB_SPEC["pipelineSpec"]["root"],
            },
            runtime_config=runtime_config,
            service_account=_TEST_SERVICE_ACCOUNT,
            network=_TEST_NETWORK,
        )

        mock_pipeline_service_create.assert_called_once_with(
            parent=_TEST_PARENT,
            pipeline_job=expected_gapic_pipeline_job,
            pipeline_job_id=_TEST_PIPELINE_JOB_ID,
        )

        mock_pipeline_service_get.assert_called_with(name=_TEST_PIPELINE_JOB_NAME)

        assert job._gca_resource == make_pipeline_job(
            gca_pipeline_state_v1beta1.PipelineState.PIPELINE_STATE_SUCCEEDED
        )
    def test_cancel_pipeline_job_without_running(
        self, mock_pipeline_service_cancel, mock_load_json,
    ):
        aiplatform.init(
            project=_TEST_PROJECT,
            staging_bucket=_TEST_GCS_BUCKET_NAME,
            credentials=_TEST_CREDENTIALS,
        )

        job = pipeline_jobs.PipelineJob(
            display_name=_TEST_PIPELINE_JOB_DISPLAY_NAME,
            template_path=_TEST_TEMPLATE_PATH,
            job_id=_TEST_PIPELINE_JOB_ID,
        )

        with pytest.raises(RuntimeError) as e:
            job.cancel()

        assert e.match(regexp=r"PipelineJob resource has not been created")
    def test_list_pipeline_job(self, mock_pipeline_service_list, mock_load_json):
        aiplatform.init(
            project=_TEST_PROJECT,
            staging_bucket=_TEST_GCS_BUCKET_NAME,
            credentials=_TEST_CREDENTIALS,
        )

        job = pipeline_jobs.PipelineJob(
            display_name=_TEST_PIPELINE_JOB_DISPLAY_NAME,
            template_path=_TEST_TEMPLATE_PATH,
            job_id=_TEST_PIPELINE_JOB_ID,
        )

        job.run()
        job.list()

        mock_pipeline_service_list.assert_called_once_with(
            request={"parent": _TEST_PARENT, "filter": None}
        )
    def test_cancel_pipeline_job(self, mock_pipeline_service_cancel, mock_load_json):
        aiplatform.init(
            project=_TEST_PROJECT,
            staging_bucket=_TEST_GCS_BUCKET_NAME,
            credentials=_TEST_CREDENTIALS,
        )

        job = pipeline_jobs.PipelineJob(
            display_name=_TEST_PIPELINE_JOB_DISPLAY_NAME,
            template_path=_TEST_TEMPLATE_PATH,
            job_id=_TEST_PIPELINE_JOB_ID,
        )

        job.run()
        job.cancel()

        mock_pipeline_service_cancel.assert_called_once_with(
            name=_TEST_PIPELINE_JOB_NAME
        )
Beispiel #8
0
    def create_run(self) -> None:
        """Runs a pipeline in Vertex Pipelines."""
        self._prepare_vertex()
        pipeline_name = self.flags_dict[labels.PIPELINE_NAME]

        # In Vertex AI, runtime parameter string value is parsed from the server,
        # so client directly sends Dict[str, str] value.
        unparsed_runtime_parameters = self.flags_dict[labels.RUNTIME_PARAMETER]

        job = pipeline_jobs.PipelineJob(
            display_name=pipeline_name,
            template_path=self._get_pipeline_definition_path(pipeline_name),
            parameter_values=unparsed_runtime_parameters)
        # TODO(b/198114641): Delete pytype exception after upgrading source code
        # to aiplatform>=1.3.

        job.run(sync=False)  # pytype: disable=attribute-error
        job.wait_for_resource_creation()  # pytype: disable=attribute-error

        click.echo('Run created for pipeline: ' + pipeline_name)
        self._print_run(job)
    def test_submit_call_pipeline_service_pipeline_job_create_legacy(
        self,
        mock_pipeline_service_create,
        mock_pipeline_service_get,
        job_spec_json,
        mock_load_json,
    ):
        aiplatform.init(
            project=_TEST_PROJECT,
            staging_bucket=_TEST_GCS_BUCKET_NAME,
            location=_TEST_LOCATION,
            credentials=_TEST_CREDENTIALS,
        )

        job = pipeline_jobs.PipelineJob(
            display_name=_TEST_PIPELINE_JOB_DISPLAY_NAME,
            template_path=_TEST_TEMPLATE_PATH,
            job_id=_TEST_PIPELINE_JOB_ID,
            parameter_values=_TEST_PIPELINE_PARAMETER_VALUES_LEGACY,
            enable_caching=True,
        )

        job.submit(service_account=_TEST_SERVICE_ACCOUNT, network=_TEST_NETWORK)

        expected_runtime_config_dict = {
            "parameters": {"string_param": {"stringValue": "hello"}},
            "gcsOutputDirectory": _TEST_GCS_BUCKET_NAME,
        }
        runtime_config = gca_pipeline_job_v1.PipelineJob.RuntimeConfig()._pb
        json_format.ParseDict(expected_runtime_config_dict, runtime_config)

        pipeline_spec = job_spec_json.get("pipelineSpec") or job_spec_json

        # Construct expected request
        expected_gapic_pipeline_job = gca_pipeline_job_v1.PipelineJob(
            display_name=_TEST_PIPELINE_JOB_DISPLAY_NAME,
            pipeline_spec={
                "components": {},
                "pipelineInfo": pipeline_spec["pipelineInfo"],
                "root": pipeline_spec["root"],
                "schemaVersion": "2.0.0",
            },
            runtime_config=runtime_config,
            service_account=_TEST_SERVICE_ACCOUNT,
            network=_TEST_NETWORK,
        )

        mock_pipeline_service_create.assert_called_once_with(
            parent=_TEST_PARENT,
            pipeline_job=expected_gapic_pipeline_job,
            pipeline_job_id=_TEST_PIPELINE_JOB_ID,
        )

        assert not mock_pipeline_service_get.called

        job.wait()

        mock_pipeline_service_get.assert_called_with(
            name=_TEST_PIPELINE_JOB_NAME, retry=base._DEFAULT_RETRY
        )

        assert job._gca_resource == make_pipeline_job(
            gca_pipeline_state_v1.PipelineState.PIPELINE_STATE_SUCCEEDED
        )
    def test_run_call_pipeline_service_create_with_timeout(
        self,
        mock_pipeline_service_create,
        mock_pipeline_service_get,
        job_spec,
        mock_load_yaml_and_json,
        sync,
    ):
        aiplatform.init(
            project=_TEST_PROJECT,
            staging_bucket=_TEST_GCS_BUCKET_NAME,
            location=_TEST_LOCATION,
            credentials=_TEST_CREDENTIALS,
        )

        job = pipeline_jobs.PipelineJob(
            display_name=_TEST_PIPELINE_JOB_DISPLAY_NAME,
            template_path=_TEST_TEMPLATE_PATH,
            job_id=_TEST_PIPELINE_JOB_ID,
            parameter_values=_TEST_PIPELINE_PARAMETER_VALUES,
            enable_caching=True,
        )

        job.run(
            service_account=_TEST_SERVICE_ACCOUNT,
            network=_TEST_NETWORK,
            sync=sync,
            create_request_timeout=180.0,
        )

        if not sync:
            job.wait()

        expected_runtime_config_dict = {
            "gcsOutputDirectory": _TEST_GCS_BUCKET_NAME,
            "parameterValues": _TEST_PIPELINE_PARAMETER_VALUES,
        }
        runtime_config = gca_pipeline_job_v1.PipelineJob.RuntimeConfig()._pb
        json_format.ParseDict(expected_runtime_config_dict, runtime_config)

        job_spec = yaml.safe_load(job_spec)
        pipeline_spec = job_spec.get("pipelineSpec") or job_spec

        # Construct expected request
        expected_gapic_pipeline_job = gca_pipeline_job_v1.PipelineJob(
            display_name=_TEST_PIPELINE_JOB_DISPLAY_NAME,
            pipeline_spec={
                "components": {},
                "pipelineInfo": pipeline_spec["pipelineInfo"],
                "root": pipeline_spec["root"],
                "schemaVersion": "2.1.0",
            },
            runtime_config=runtime_config,
            service_account=_TEST_SERVICE_ACCOUNT,
            network=_TEST_NETWORK,
        )

        mock_pipeline_service_create.assert_called_once_with(
            parent=_TEST_PARENT,
            pipeline_job=expected_gapic_pipeline_job,
            pipeline_job_id=_TEST_PIPELINE_JOB_ID,
            timeout=180.0,
        )