Ejemplo n.º 1
0
    def test_create_schedule_when_cloud_function_already_exists(self):
        test_data_path = os.path.join(os.path.dirname(__file__), 'testdata')
        pipeline_path = os.path.join(test_data_path, 'pipeline1.json')

        project_id = 'project-id'
        location = 'us-central1'
        function_url = ('https://{}-{}.cloudfunctions.net/' +
                        'templated_http_request-v1').format(
                            location, project_id)

        def mock_get_cloud_functions_api():
            functions_api = mock.Mock()

            def function_get(name):
                del name
                request_mock = mock.Mock()
                request_mock.execute.return_value = {
                    'httpsTrigger': {
                        'url': function_url
                    }
                }
                return request_mock

            functions_api.get = function_get
            return functions_api

        with mock.patch(
                'kfp.v2.google.client.schedule._enable_required_apis',
                return_value=None,
        ), mock.patch(
                'kfp.v2.google.client.schedule._get_cloud_functions_api',
                new=mock_get_cloud_functions_api,
        ), mock.patch(
                'kfp.v2.google.client.schedule._create_scheduler_job',
                spec=True) as create_scheduler_job_mock:
            schedule.create_from_pipeline_file(
                pipeline_path=pipeline_path,
                schedule='46 * * * *',
                project_id=project_id,
                region=location,
                time_zone='America/Los_Angeles',
            )
            create_scheduler_job_mock.assert_called_once()
            actual_job_body = create_scheduler_job_mock.call_args[1]['job_body']
            self.assertEqual(actual_job_body['http_target']['uri'],
                             function_url)
Ejemplo n.º 2
0
    def create_schedule_from_job_spec(
        self,
        job_spec_path: str,
        schedule: str,
        time_zone: str = 'US/Pacific',
        pipeline_root: Optional[str] = None,
        parameter_values: Optional[Mapping[str, Any]] = None,
        service_account: Optional[str] = None,
    ) -> dict:
        """Creates schedule for compiled pipeline file.

    This function creates scheduled job which will run the provided pipeline on
    schedule. This is implemented by creating a Google Cloud Scheduler Job.
    The job will be visible in https://console.google.com/cloudscheduler and can
    be paused/resumed and deleted.

    To make the system work, this function also creates a Google Cloud Function
    which acts as an intermediare between the Scheduler and Pipelines. A single
    function is shared between all scheduled jobs.
    The following APIs will be activated automatically:
    * cloudfunctions.googleapis.com
    * cloudscheduler.googleapis.com
    * appengine.googleapis.com

    Args:
      job_spec_path: Path of the compiled pipeline file.
      schedule: Schedule in cron format. Example: "45 * * * *"
      time_zone: Schedule time zone. Default is 'US/Pacific'
      parameter_values: Arguments for the pipeline parameters
      pipeline_root: Optionally the user can override the pipeline root
        specified during the compile time.
      service_account: The service account that the pipeline workload runs as.

    Returns:
      Created Google Cloud Scheduler Job object dictionary.
    """
        return create_from_pipeline_file(pipeline_path=job_spec_path,
                                         schedule=schedule,
                                         project_id=self._project_id,
                                         region=self._region,
                                         time_zone=time_zone,
                                         parameter_values=parameter_values,
                                         pipeline_root=pipeline_root,
                                         service_account=service_account)
Ejemplo n.º 3
0
    def test_create_from_pipeline_file(self):
        test_data_path = os.path.join(os.path.dirname(__file__), 'testdata')
        pipeline_path = os.path.join(test_data_path, 'pipeline1.json')
        pipeline_request_body_path = os.path.join(
            test_data_path, 'pipeline1_request_body.json')

        project_id = 'project-id'
        location = 'us-central1'
        function_url = ('https://{}-{}.cloudfunctions.net/' +
                        'templated_http_request-v1').format(
                            location, project_id)
        with mock.patch(
                'kfp.v2.google.client.schedule._enable_required_apis',
                return_value=None,
        ), mock.patch(
                'kfp.v2.google.client.schedule._get_proxy_cloud_function_endpoint',
                return_value=function_url,
        ), mock.patch('kfp.v2.google.client.schedule._create_scheduler_job',
                      spec=True) as create_scheduler_job_mock:
            schedule.create_from_pipeline_file(
                pipeline_path=pipeline_path,
                schedule='46 * * * *',
                project_id=project_id,
                region=location,
                time_zone='America/Los_Angeles',
                parameter_values={'name_param': 'World'},
                pipeline_root='gs://my-project/pipeline_root/tmp/',
            )

            with open(pipeline_request_body_path, 'rb') as f:
                expected_body_dict = json.load(f)
            expected_body_json = json.dumps(expected_body_dict)
            expected_body_data = expected_body_json.encode('utf-8')
            expected_body_data_hash = hashlib.sha256(
                expected_body_data).hexdigest()[0:8]

            create_scheduler_job_mock.assert_called_with(
                project_location_path='projects/{}/locations/{}'.format(
                    project_id, location),
                job_body={
                    'name':
                    'projects/{}/locations/{}/jobs/pipeline_my-pipeline_{}_46-a-a-a-a'
                    .format(project_id, location, expected_body_data_hash),
                    'schedule':
                    '46 * * * *',
                    'time_zone':
                    'America/Los_Angeles',
                    'http_target': {
                        'http_method':
                        'POST',
                        'uri':
                        function_url,
                        'body':
                        base64.b64encode(expected_body_data).decode('utf-8'),
                        'oidc_token': {
                            'service_account_email':
                            '*****@*****.**',
                        },
                    },
                },
            )