Beispiel #1
0
  def testStartAIPTrainingWithUserContainer(self, mock_discovery):
    self._training_inputs['masterConfig'] = {'imageUri': 'my-custom-image'}
    mock_discovery.build.return_value = self._mock_api_client
    mock_create = mock.Mock()
    self._mock_api_client.projects().jobs().create = mock_create
    mock_get = mock.Mock()
    self._mock_api_client.projects().jobs().get.return_value = mock_get
    mock_get.execute.return_value = {
        'state': 'SUCCEEDED',
    }

    class_path = 'foo.bar.class'

    runner.start_aip_training(self._inputs, self._outputs,
                              self._exec_properties, class_path,
                              self._training_inputs)

    mock_create.assert_called_with(
        body=mock.ANY, parent='projects/{}'.format(self._project_id))
    (_, kwargs) = mock_create.call_args
    body = kwargs['body']
    self.assertDictContainsSubset(
        {
            'masterConfig': {
                'imageUri': 'my-custom-image',
            },
            'args': [
                '--executor_class_path', class_path, '--inputs', '{}',
                '--outputs', '{}', '--exec-properties', '{"custom_config": {}}'
            ],
        }, body['trainingInput'])
    self.assertStartsWith(body['jobId'], 'tfx_')
    mock_get.execute.assert_called_with()
Beispiel #2
0
  def testStartAIPTraining(self, mock_discovery):
    mock_discovery.build.return_value = self._mock_api_client
    self._setUpTrainingMocks()

    class_path = 'foo.bar.class'

    runner.start_aip_training(self._inputs, self._outputs,
                              self._serialize_custom_config_under_test(),
                              class_path,
                              self._training_inputs, None)

    self._mock_create.assert_called_with(
        body=mock.ANY, parent='projects/{}'.format(self._project_id))
    (_, kwargs) = self._mock_create.call_args
    body = kwargs['body']

    default_image = 'gcr.io/tfx-oss-public/tfx:{}'.format(version.__version__)
    self.assertDictContainsSubset(
        {
            'masterConfig': {
                'imageUri':
                    default_image,
                'containerCommand':
                    runner._CONTAINER_COMMAND + [
                        '--executor_class_path', class_path, '--inputs', '{}',
                        '--outputs', '{}', '--exec-properties',
                        '{"custom_config": '
                        '"{\\"ai_platform_training_args\\": {\\"project\\": \\"12345\\"'
                        '}}"}'
                    ],
            },
        }, body['trainingInput'])
    self.assertStartsWith(body['jobId'], 'tfx_')
    self._mock_get.execute.assert_called_with()
Beispiel #3
0
    def testStartAIPTraining(self, mock_discovery):
        mock_discovery.build.return_value = self._mock_api_client
        self._setUpTrainingMocks()

        class_path = 'foo.bar.class'

        runner.start_aip_training(self._inputs, self._outputs,
                                  self._exec_properties, class_path,
                                  self._training_inputs, None)

        self._mock_create.assert_called_with(body=mock.ANY,
                                             parent='projects/{}'.format(
                                                 self._project_id))
        (_, kwargs) = self._mock_create.call_args
        body = kwargs['body']

        default_image = 'gcr.io/tfx-oss-public/tfx:%s' % (version.__version__)
        self.assertDictContainsSubset(
            {
                'masterConfig': {
                    'imageUri': default_image,
                },
                'args': [
                    '--executor_class_path', class_path, '--inputs', '{}',
                    '--outputs', '{}', '--exec-properties',
                    '{"custom_config": '
                    '{"ai_platform_training_args": {"project": "12345"}}}'
                ],
            }, body['trainingInput'])
        self.assertStartsWith(body['jobId'], 'tfx_')
        self._mock_get.execute.assert_called_with()
Beispiel #4
0
    def testStartAIPTrainingWithUserContainer(self, mock_discovery):
        mock_discovery.build.return_value = self._mock_api_client
        self._setUpTrainingMocks()

        class_path = 'foo.bar.class'

        self._training_inputs['masterConfig'] = {'imageUri': 'my-custom-image'}
        self._exec_properties['custom_config'][
            executor.JOB_ID_KEY] = self._job_id
        runner.start_aip_training(self._inputs, self._outputs,
                                  self._exec_properties, class_path,
                                  self._training_inputs, self._job_id)

        self._mock_create.assert_called_with(body=mock.ANY,
                                             parent='projects/{}'.format(
                                                 self._project_id))
        (_, kwargs) = self._mock_create.call_args
        body = kwargs['body']
        self.assertDictContainsSubset(
            {
                'masterConfig': {
                    'imageUri': 'my-custom-image',
                },
                'args': [
                    '--executor_class_path', class_path, '--inputs', '{}',
                    '--outputs', '{}', '--exec-properties',
                    '{"custom_config": '
                    '{"ai_platform_training_args": '
                    '{"masterConfig": {"imageUri": "my-custom-image"}, '
                    '"project": "12345"}, '
                    '"ai_platform_training_job_id": "my_jobid"}}'
                ],
            }, body['trainingInput'])
        self.assertEqual(body['jobId'], 'my_jobid')
        self._mock_get.execute.assert_called_with()
Beispiel #5
0
    def testStartAIPTrainingWithUserContainer_uCAIP(self, mock_gapic):
        mock_gapic.JobServiceClient.return_value = self._mock_api_client
        self._setUpUcaipTrainingMocks()

        class_path = 'foo.bar.class'

        self._training_inputs['worker_pool_specs'] = [{
            'container_spec': {
                'image_uri': 'my-custom-image'
            }
        }]
        self._exec_properties['custom_config'][
            executor.JOB_ID_KEY] = self._job_id
        region = 'us-central2'
        runner.start_aip_training(self._inputs, self._outputs,
                                  self._serialize_custom_config_under_test(),
                                  class_path, self._training_inputs,
                                  self._job_id, True, region)

        self._mock_create.assert_called_with(
            parent='projects/{}/locations/{}'.format(self._project_id, region),
            custom_job=mock.ANY)
        (_, kwargs) = self._mock_create.call_args
        body = kwargs['custom_job']
        self.assertDictContainsSubset(
            {
                'worker_pool_specs': [
                    {
                        'container_spec': {
                            'image_uri':
                            'my-custom-image',
                            'command':
                            runner._CONTAINER_COMMAND + [
                                '--executor_class_path', class_path,
                                '--inputs', '{}', '--outputs', '{}',
                                '--exec-properties', '{"custom_config": '
                                '"{\\"ai_platform_training_args\\": '
                                '{\\"project\\": \\"12345\\", '
                                '\\"worker_pool_specs\\": '
                                '[{\\"container_spec\\": '
                                '{\\"image_uri\\": \\"my-custom-image\\"}}]}, '
                                '\\"ai_platform_training_job_id\\": '
                                '\\"my_jobid\\"}"}'
                            ],
                        },
                    },
                ],
            }, body['job_spec'])
        self.assertEqual(body['display_name'], 'my_jobid')
        self._mock_get.assert_called_with(name='ucaip_job_study_id')
Beispiel #6
0
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]) -> None:
        """Starts a Tuner component as a job on Google Cloud AI Platform."""
        self._log_startup(input_dict, output_dict, exec_properties)

        custom_config = json_utils.loads(
            exec_properties.get(constants.CUSTOM_CONFIG_KEY, 'null'))
        if custom_config is None:
            raise ValueError('custom_config is not provided')

        if not isinstance(custom_config, Dict):
            raise TypeError(
                'custom_config in execution properties must be a dict, '
                'but received %s' % type(custom_config))

        training_inputs = custom_config.get(
            ai_platform_trainer_executor.TRAINING_ARGS_KEY)
        if training_inputs is None:
            err_msg = ('\'%s\' not found in custom_config.' %
                       ai_platform_trainer_executor.TRAINING_ARGS_KEY)
            logging.error(err_msg)
            raise ValueError(err_msg)

        tune_args = tuner_executor.get_tune_args(exec_properties)

        num_parallel_trials = (1 if not tune_args else
                               tune_args.num_parallel_trials)
        if num_parallel_trials > 1:
            # Chief node is also responsible for conducting tuning loop.
            desired_worker_count = num_parallel_trials - 1

            if training_inputs.get('workerCount') != desired_worker_count:
                logging.warning('workerCount is overridden with %s',
                                desired_worker_count)
                training_inputs['workerCount'] = desired_worker_count

            training_inputs['scaleTier'] = 'CUSTOM'
            training_inputs['masterType'] = (training_inputs.get('masterType')
                                             or 'standard')
            training_inputs['workerType'] = (training_inputs.get('workerType')
                                             or 'standard')

        # 'tfx_tuner_YYYYmmddHHMMSS' is the default job ID if not specified.
        job_id = (custom_config.get(ai_platform_trainer_executor.JOB_ID_KEY)
                  or 'tfx_tuner_{}'.format(
                      datetime.datetime.now().strftime('%Y%m%d%H%M%S')))

        # TODO(b/160059039): Factor out label creation to a utility function.
        executor_class = _WorkerExecutor
        executor_class_path = '%s.%s' % (executor_class.__module__,
                                         executor_class.__name__)

        # Note: exec_properties['custom_config'] here is a dict.
        return runner.start_aip_training(input_dict, output_dict,
                                         exec_properties, executor_class_path,
                                         training_inputs, job_id)
Beispiel #7
0
    def testStartAIPTraining_uCAIP(self, mock_gapic):
        mock_gapic.JobServiceClient.return_value = self._mock_api_client
        self._setUpUcaipTrainingMocks()

        class_path = 'foo.bar.class'
        region = 'us-central1'

        runner.start_aip_training(self._inputs, self._outputs,
                                  self._serialize_custom_config_under_test(),
                                  class_path, self._training_inputs, None,
                                  True, region)

        self._mock_create.assert_called_with(
            parent='projects/{}/locations/{}'.format(self._project_id, region),
            custom_job=mock.ANY)
        (_, kwargs) = self._mock_create.call_args
        body = kwargs['custom_job']

        default_image = 'gcr.io/tfx-oss-public/tfx:{}'.format(
            version_utils.get_image_version())
        self.assertDictContainsSubset(
            {
                'worker_pool_specs': [
                    {
                        'container_spec': {
                            'image_uri':
                            default_image,
                            'command':
                            runner._CONTAINER_COMMAND + [
                                '--executor_class_path', class_path,
                                '--inputs', '{}', '--outputs', '{}',
                                '--exec-properties', '{"custom_config": '
                                '"{\\"ai_platform_training_args\\": '
                                '{\\"project\\": \\"12345\\"'
                                '}}"}'
                            ],
                        },
                    },
                ],
            }, body['job_spec'])
        self.assertStartsWith(body['display_name'], 'tfx_')
        self._mock_get.assert_called_with(name='ucaip_job_study_id')
Beispiel #8
0
    def testStartAIPTrainingWithUserContainer(self, mock_discovery):
        mock_discovery.build.return_value = self._mock_api_client
        self._setUpTrainingMocks()

        class_path = 'foo.bar.class'

        self._training_inputs['masterConfig'] = {'imageUri': 'my-custom-image'}
        self._exec_properties['custom_config'][
            executor.JOB_ID_KEY] = self._job_id
        runner.start_aip_training(self._inputs, self._outputs,
                                  self._serialize_custom_config_under_test(),
                                  class_path, self._training_inputs,
                                  self._job_id)

        self._mock_create.assert_called_with(body=mock.ANY,
                                             parent='projects/{}'.format(
                                                 self._project_id))
        (_, kwargs) = self._mock_create.call_args
        body = kwargs['body']
        self.assertDictContainsSubset(
            {
                'masterConfig': {
                    'imageUri':
                    'my-custom-image',
                    'containerCommand':
                    runner._CONTAINER_COMMAND + [
                        '--executor_class_path', class_path, '--inputs', '{}',
                        '--outputs', '{}', '--exec-properties',
                        '{"custom_config": '
                        '"{\\"ai_platform_training_args\\": '
                        '{\\"masterConfig\\": {\\"imageUri\\": \\"my-custom-image\\"}, '
                        '\\"project\\": \\"12345\\"}, '
                        '\\"ai_platform_training_job_id\\": \\"my_jobid\\"}"}'
                    ],
                }
            }, body['trainingInput'])
        self.assertEqual(body['jobId'], 'my_jobid')
        self._mock_get.execute.assert_called_with()
        self._mock_create_request.execute.assert_called_with()
Beispiel #9
0
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]):
        """Starts a trainer job on Google Cloud AI Platform.

    Args:
      input_dict: Passthrough input dict for tfx.components.Trainer.executor.
      output_dict: Passthrough input dict for tfx.components.Trainer.executor.
      exec_properties: Mostly a passthrough input dict for
        tfx.components.Trainer.executor. custom_config.ai_platform_training_args
        and custom_config.ai_platform_training_job_id are consumed by this
        class.  For the full set of parameters supported by Google Cloud AI
        Platform, refer to
        https://cloud.google.com/ml-engine/docs/tensorflow/training-jobs#configuring_the_job

    Returns:
      None
    Raises:
      ValueError: if ai_platform_training_args is not in
      exec_properties.custom_config.
      RuntimeError: if the Google Cloud AI Platform training job failed.
    """
        self._log_startup(input_dict, output_dict, exec_properties)

        custom_config = json_utils.loads(
            exec_properties.get(standard_component_specs.CUSTOM_CONFIG_KEY,
                                'null'))
        if custom_config is not None and not isinstance(custom_config, Dict):
            raise ValueError(
                'custom_config in execution properties needs to be a '
                'dict.')

        training_inputs = custom_config.get(TRAINING_ARGS_KEY)
        if training_inputs is None:
            err_msg = '\'%s\' not found in custom_config.' % TRAINING_ARGS_KEY
            absl.logging.error(err_msg)
            raise ValueError(err_msg)

        job_id = custom_config.get(JOB_ID_KEY)
        enable_ucaip = custom_config.get(ENABLE_UCAIP_KEY, False)
        ucaip_region = custom_config.get(UCAIP_REGION_KEY)

        executor_class = self._GetExecutorClass()
        executor_class_path = '%s.%s' % (executor_class.__module__,
                                         executor_class.__name__)
        # Note: exec_properties['custom_config'] here is a dict.
        return runner.start_aip_training(input_dict, output_dict,
                                         exec_properties, executor_class_path,
                                         training_inputs, job_id, enable_ucaip,
                                         ucaip_region)
Beispiel #10
0
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]):
        """Starts a trainer job on Google Cloud AI Platform.

    Args:
      input_dict: Passthrough input dict for tfx.components.Trainer.executor.
      output_dict: Passthrough input dict for tfx.components.Trainer.executor.
      exec_properties: Mostly a passthrough input dict for
        tfx.components.Trainer.executor. custom_config.ai_platform_training_args
        and custom_config.ai_platform_training_job_id are consumed by this
        class.  For the full set of parameters supported by Google Cloud AI
        Platform, refer to
        https://cloud.google.com/ml-engine/docs/tensorflow/training-jobs#configuring_the_job

    Returns:
      None
    Raises:
      ValueError: if ai_platform_training_args is not in
      exec_properties.custom_config.
      RuntimeError: if the Google Cloud AI Platform training job failed.
    """
        self._log_startup(input_dict, output_dict, exec_properties)

        custom_config = exec_properties.get('custom_config', {})
        training_inputs = custom_config.get(TRAINING_ARGS_KEY)
        if training_inputs is None:
            err_msg = '\'%s\' not found in custom_config.' % TRAINING_ARGS_KEY
            absl.logging.error(err_msg)
            raise ValueError(err_msg)

        job_id = custom_config.get(JOB_ID_KEY)

        executor_class = self._GetExecutorClass()
        executor_class_path = '%s.%s' % (executor_class.__module__,
                                         executor_class.__name__)

        return runner.start_aip_training(input_dict, output_dict,
                                         exec_properties, executor_class_path,
                                         training_inputs, job_id)