Ejemplo n.º 1
0
 def testScopedLabels(self):
     """Test for scoped_labels."""
     orig_labels = telemetry_utils.get_labels_dict()
     with telemetry_utils.scoped_labels({'foo': 'bar'}):
         self.assertDictEqual(telemetry_utils.get_labels_dict(),
                              dict({'foo': 'bar'}, **orig_labels))
         with telemetry_utils.scoped_labels({'inner': 'baz'}):
             self.assertDictEqual(
                 telemetry_utils.get_labels_dict(),
                 dict({
                     'foo': 'bar',
                     'inner': 'baz'
                 }, **orig_labels))
Ejemplo n.º 2
0
  def run(self, pipeline: pipeline_py.Pipeline) -> None:
    """Runs given logical pipeline locally.

    Args:
      pipeline: Logical pipeline containing pipeline args and components.
    """
    # For CLI, while creating or updating pipeline, pipeline_args are extracted
    # and hence we avoid executing the pipeline.
    if 'TFX_JSON_EXPORT_PIPELINE_ARGS_PATH' in os.environ:
      return

    for component in pipeline.components:
      # TODO(b/187122662): Pass through pip dependencies as a first-class
      # component flag.
      if isinstance(component, base_component.BaseComponent):
        component._resolve_pip_dependencies(  # pylint: disable=protected-access
            pipeline.pipeline_info.pipeline_root)

    c = compiler.Compiler()
    pipeline = c.compile(pipeline)

    # Substitute the runtime parameter to be a concrete run_id
    runtime_parameter_utils.substitute_runtime_parameter(
        pipeline, {
            constants.PIPELINE_RUN_ID_PARAMETER_NAME:
                datetime.datetime.now().isoformat(),
        })

    deployment_config = runner_utils.extract_local_deployment_config(pipeline)
    connection_config = deployment_config.metadata_connection_config

    logging.info('Running pipeline:\n %s', pipeline)
    logging.info('Using deployment config:\n %s', deployment_config)
    logging.info('Using connection config:\n %s', connection_config)

    with telemetry_utils.scoped_labels(
        {telemetry_utils.LABEL_TFX_RUNNER: 'local'}):
      # Run each component. Note that the pipeline.components list is in
      # topological order.
      #
      # TODO(b/171319478): After IR-based execution is used, used multi-threaded
      # execution so that independent components can be run in parallel.
      for node in pipeline.nodes:
        pipeline_node = node.pipeline_node
        node_id = pipeline_node.node_info.id
        executor_spec = runner_utils.extract_executor_spec(
            deployment_config, node_id)
        custom_driver_spec = runner_utils.extract_custom_driver_spec(
            deployment_config, node_id)

        component_launcher = launcher.Launcher(
            pipeline_node=pipeline_node,
            mlmd_connection=metadata.Metadata(connection_config),
            pipeline_info=pipeline.pipeline_info,
            pipeline_runtime_spec=pipeline.runtime_spec,
            executor_spec=executor_spec,
            custom_driver_spec=custom_driver_spec)
        logging.info('Component %s is running.', node_id)
        component_launcher.launch()
        logging.info('Component %s is finished.', node_id)
Ejemplo n.º 3
0
 def testDoBlessed(self, mock_runner, _):
     self._model_blessing.uri = os.path.join(self._source_data_dir,
                                             'model_validator/blessed')
     self._model_blessing.set_int_custom_property('blessed', 1)
     mock_runner.get_service_name_and_api_version.return_value = ('ml',
                                                                  'v1')
     self._executor.Do(self._input_dict, self._output_dict,
                       self._serialize_custom_config_under_test())
     executor_class_path = '%s.%s' % (self._executor.__class__.__module__,
                                      self._executor.__class__.__name__)
     with telemetry_utils.scoped_labels(
         {telemetry_utils.LABEL_TFX_EXECUTOR: executor_class_path}):
         job_labels = telemetry_utils.get_labels_dict()
     mock_runner.deploy_model_for_aip_prediction.assert_called_once_with(
         mock.ANY,
         self._model_push.uri,
         mock.ANY,
         mock.ANY,
         job_labels,
     )
     self.assertPushed()
     version = self._model_push.get_string_custom_property('pushed_version')
     self.assertEqual(
         self._model_push.get_string_custom_property('pushed_destination'),
         'projects/project_id/models/model_name/versions/{}'.format(
             version))
Ejemplo n.º 4
0
  def testDoSkippedModelCreation(self, mock_runner, mock_run_model_inference,
                                 _):
    input_dict = {
        'examples': [self._examples],
        'model': [self._model],
        'model_blessing': [self._model_blessing],
    }
    output_dict = {
        'inference_result': [self._inference_result],
    }
    ai_platform_serving_args = {
        'model_name': 'model_name',
        'project_id': 'project_id'
    }
    # Create exe properties.
    exec_properties = {
        'data_spec':
            proto_utils.proto_to_json(bulk_inferrer_pb2.DataSpec()),
        'custom_config':
            json_utils.dumps(
                {executor.SERVING_ARGS_KEY: ai_platform_serving_args}),
    }
    mock_runner.get_service_name_and_api_version.return_value = ('ml', 'v1')
    mock_runner.create_model_for_aip_prediction_if_not_exist.return_value = False

    # Run executor.
    bulk_inferrer = executor.Executor(self._context)
    bulk_inferrer.Do(input_dict, output_dict, exec_properties)

    ai_platform_prediction_model_spec = (
        model_spec_pb2.AIPlatformPredictionModelSpec(
            project_id='project_id',
            model_name='model_name',
            version_name=self._model_version))
    ai_platform_prediction_model_spec.use_serialization_config = True
    inference_endpoint = model_spec_pb2.InferenceSpecType()
    inference_endpoint.ai_platform_prediction_model_spec.CopyFrom(
        ai_platform_prediction_model_spec)
    mock_run_model_inference.assert_called_once_with(mock.ANY, mock.ANY,
                                                     mock.ANY, mock.ANY,
                                                     mock.ANY,
                                                     inference_endpoint)
    executor_class_path = '%s.%s' % (bulk_inferrer.__class__.__module__,
                                     bulk_inferrer.__class__.__name__)
    with telemetry_utils.scoped_labels(
        {telemetry_utils.LABEL_TFX_EXECUTOR: executor_class_path}):
      job_labels = telemetry_utils.make_labels_dict()
    mock_runner.deploy_model_for_aip_prediction.assert_called_once_with(
        serving_path=path_utils.serving_model_path(self._model.uri),
        model_version_name=mock.ANY,
        ai_platform_serving_args=ai_platform_serving_args,
        labels=job_labels,
        api=mock.ANY,
        skip_model_endpoint_creation=True,
        set_default=False)
    mock_runner.delete_model_from_aip_if_exists.assert_called_once_with(
        model_version_name=mock.ANY,
        ai_platform_serving_args=ai_platform_serving_args,
        api=mock.ANY,
        delete_model_endpoint=False)
Ejemplo n.º 5
0
 def testDoBlessed_Vertex(self, mock_runner):
     endpoint_uri = 'projects/project_id/locations/us-central1/endpoints/12345'
     mock_runner.deploy_model_for_aip_prediction.return_value = endpoint_uri
     self._model_blessing.uri = os.path.join(self._source_data_dir,
                                             'model_validator/blessed')
     self._model_blessing.set_int_custom_property('blessed', 1)
     self._executor.Do(self._input_dict, self._output_dict,
                       self._serialize_custom_config_under_test_vertex())
     executor_class_path = '%s.%s' % (self._executor.__class__.__module__,
                                      self._executor.__class__.__name__)
     with telemetry_utils.scoped_labels(
         {telemetry_utils.LABEL_TFX_EXECUTOR: executor_class_path}):
         job_labels = telemetry_utils.make_labels_dict()
     mock_runner.deploy_model_for_aip_prediction.assert_called_once_with(
         serving_container_image_uri=self._container_image_uri_vertex,
         model_version_name=mock.ANY,
         ai_platform_serving_args=mock.ANY,
         labels=job_labels,
         serving_path=self._model_push.uri,
         endpoint_region='us-central1',
         enable_vertex=True,
     )
     self.assertPushed()
     self.assertEqual(
         self._model_push.get_string_custom_property('pushed_destination'),
         endpoint_uri)
Ejemplo n.º 6
0
    def __init__(self, context: Optional[Context] = None):
        """Constructs a beam based executor."""
        super().__init__(context)

        self._beam_pipeline_args = None
        if context:
            if isinstance(context, BaseBeamExecutor.Context):
                self._beam_pipeline_args = context.beam_pipeline_args
            else:
                raise ValueError(
                    'BaseBeamExecutor found initialized with '
                    'BaseExecutorSpec. Please use BeamEecutorSpec for '
                    'Beam Components instead.')

        if self._beam_pipeline_args:
            self._beam_pipeline_args = dependency_utils.make_beam_dependency_flags(
                self._beam_pipeline_args)
            executor_class_path = '%s.%s' % (self.__class__.__module__,
                                             self.__class__.__name__)
            # TODO(zhitaoli): Rethink how we can add labels and only normalize them
            # if the job is submitted against GCP.
            with telemetry_utils.scoped_labels(
                {telemetry_utils.LABEL_TFX_EXECUTOR: executor_class_path}):
                self._beam_pipeline_args.extend(
                    telemetry_utils.make_beam_labels_args())

            # TODO(b/174174381): Don't use beam_pipeline_args to set ABSL flags.
            flags.FLAGS(sys.argv + self._beam_pipeline_args, known_only=True)
Ejemplo n.º 7
0
 def setUp(self):
     super(RunnerTest, self).setUp()
     self._output_data_dir = os.path.join(
         os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
         self._testMethodName)
     self._project_id = '12345'
     self._mock_api_client = mock.Mock()
     self._inputs = {}
     self._outputs = {}
     self._training_inputs = {
         'project': self._project_id,
     }
     self._job_id = 'my_jobid'
     # Dict format of exec_properties. custom_config needs to be serialized
     # before being passed into start_aip_training function.
     self._exec_properties = {
         'custom_config': {
             executor.TRAINING_ARGS_KEY: self._training_inputs,
         },
     }
     self._model_name = 'model_name'
     self._ai_platform_serving_args = {
         'model_name': self._model_name,
         'project_id': self._project_id,
     }
     self._executor_class_path = 'my.executor.Executor'
     with telemetry_utils.scoped_labels(
         {telemetry_utils.LABEL_TFX_EXECUTOR: self._executor_class_path}):
         self._job_labels = telemetry_utils.get_labels_dict()
Ejemplo n.º 8
0
    def __init__(self, context: Optional[Context] = None):
        """Constructs a beam based executor."""
        self._context = context
        self._beam_pipeline_args = context.beam_pipeline_args if context else None

        if self._beam_pipeline_args:
            if beam:
                self._beam_pipeline_args = dependency_utils.make_beam_dependency_flags(
                    self._beam_pipeline_args)
                executor_class_path = '%s.%s' % (self.__class__.__module__,
                                                 self.__class__.__name__)
                # TODO(zhitaoli): Rethink how we can add labels and only normalize them
                # if the job is submitted against GCP.
                with telemetry_utils.scoped_labels(
                    {telemetry_utils.LABEL_TFX_EXECUTOR: executor_class_path}):
                    self._beam_pipeline_args.extend(
                        telemetry_utils.make_beam_labels_args())

                # TODO(b/174174381): Don't use beam_pipeline_args to set ABSL flags.
                flags.FLAGS(sys.argv + self._beam_pipeline_args,
                            known_only=True)
            else:
                # TODO(b/156000550): We should not specialize `Context` to embed beam
                # pipeline args. Instead, the `Context` should consists of generic
                # purpose `extra_flags` which can be interpreted differently by
                # different implementations of executors.
                absl.logging.warning(
                    'Executor context\'s beam_pipeline_args is being ignored because '
                    'Apache Beam is not installed.')
Ejemplo n.º 9
0
 def testScopedLabels(self):
     """Test for scoped_labels."""
     orig_labels = telemetry_utils.make_labels_dict()
     with telemetry_utils.scoped_labels({'foo': 'bar'}):
         self.assertDictEqual(telemetry_utils.make_labels_dict(),
                              dict({'foo': 'bar'}, **orig_labels))
         with telemetry_utils.scoped_labels({
                 telemetry_utils.LABEL_TFX_EXECUTOR:
                 'custom_component.custom_executor'
         }):
             self.assertDictEqual(
                 telemetry_utils.make_labels_dict(),
                 dict(
                     {
                         'foo':
                         'bar',
                         telemetry_utils.LABEL_TFX_EXECUTOR:
                         'third_party_executor'
                     }, **orig_labels))
         with telemetry_utils.scoped_labels({
                 telemetry_utils.LABEL_TFX_EXECUTOR:
                 'tfx.components.example_gen.import_example_gen.executor.Executor'
         }):
             self.assertDictEqual(
                 telemetry_utils.make_labels_dict(),
                 dict(
                     {
                         'foo':
                             'bar',
                         telemetry_utils.LABEL_TFX_EXECUTOR:  # Label is normalized.
                             'tfx-components-example_gen-import_example_gen-executor-executor'
                     },
                     **orig_labels))
         with telemetry_utils.scoped_labels({
                 telemetry_utils.LABEL_TFX_EXECUTOR:
                 'tfx.extensions.google_cloud_big_query.example_gen.executor.Executor'
         }):
             self.assertDictEqual(
                 telemetry_utils.make_labels_dict(),
                 dict(
                     {
                         'foo':
                             'bar',
                         telemetry_utils.LABEL_TFX_EXECUTOR:  # Label is normalized.
                             'tfx-extensions-google_cloud_big_query-example_gen-executor-exec'
                     },
                     **orig_labels))
Ejemplo n.º 10
0
  def create_training_job(self, input_dict: Dict[str, List[types.Artifact]],
                          output_dict: Dict[str, List[types.Artifact]],
                          exec_properties: Dict[str, Any],
                          executor_class_path: str, job_args: Dict[str, Any],
                          job_id: Optional[str]) -> Dict[str, Any]:
    """Get training args for runner._launch_aip_training.

    The training args contain the inputs/outputs/exec_properties to the
    tfx.scripts.run_executor module.

    Args:
      input_dict: Passthrough input dict for tfx.components.Trainer.executor.
      output_dict: Passthrough input dict for tfx.components.Trainer.executor.
      exec_properties: Passthrough input dict for
        tfx.components.Trainer.executor.
      executor_class_path: class path for TFX core default trainer.
      job_args: Training input argument for AI Platform training job.
        'pythonModule', 'pythonVersion' and 'runtimeVersion' will be inferred.
        For the full set of parameters, refer to
        https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#TrainingInput
      job_id: Job ID for AI Platform Training job. If not supplied,
        system-determined unique ID is given. Refer to
      https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#resource-job

    Returns:
      A dict containing the training arguments
    """
    training_inputs = job_args.copy()

    container_command = self.generate_container_command(input_dict, output_dict,
                                                        exec_properties,
                                                        executor_class_path)

    if not training_inputs.get('masterConfig'):
      training_inputs['masterConfig'] = {
          'imageUri': _TFX_IMAGE,
      }

    # Always use our own entrypoint instead of relying on container default.
    if 'containerCommand' in training_inputs['masterConfig']:
      logging.warn('Overriding custom value of containerCommand')
    training_inputs['masterConfig']['containerCommand'] = container_command

    with telemetry_utils.scoped_labels(
        {telemetry_utils.LABEL_TFX_EXECUTOR: executor_class_path}):
      job_labels = telemetry_utils.make_labels_dict()

    # 'tfx_YYYYmmddHHMMSS' is the default job ID if not explicitly specified.
    job_id = job_id or 'tfx_{}'.format(
        datetime.datetime.now().strftime('%Y%m%d%H%M%S'))

    caip_job = {
        'job_id': job_id,
        'training_input': training_inputs,
        'labels': job_labels
    }

    return caip_job
Ejemplo n.º 11
0
def main():
    # Log to the container's stdout so it can be streamed by the orchestrator.
    logging.basicConfig(stream=sys.stdout, level=logging.INFO)
    logging.getLogger().setLevel(logging.INFO)

    parser = argparse.ArgumentParser()
    parser.add_argument('--pipeline_name', type=str, required=True)
    parser.add_argument('--pipeline_root', type=str, required=True)
    parser.add_argument('--run_id', type=str, required=True)
    parser.add_argument('--metadata_config', type=str, required=True)
    parser.add_argument('--beam_pipeline_args', type=str, required=True)
    parser.add_argument('--additional_pipeline_args', type=str, required=True)
    parser.add_argument('--component_launcher_class_path',
                        type=str,
                        required=True)
    parser.add_argument('--enable_cache', action='store_true')
    parser.add_argument('--serialized_component', type=str, required=True)
    parser.add_argument('--component_config', type=str, required=True)

    args = parser.parse_args()

    component = json_utils.loads(args.serialized_component)
    component_config = json_utils.loads(args.component_config)
    component_launcher_class = import_utils.import_class_by_path(
        args.component_launcher_class_path)
    if not issubclass(component_launcher_class,
                      base_component_launcher.BaseComponentLauncher):
        raise TypeError(
            'component_launcher_class "%s" is not subclass of base_component_launcher.BaseComponentLauncher'
            % component_launcher_class)

    metadata_config = metadata_store_pb2.ConnectionConfig()
    json_format.Parse(args.metadata_config, metadata_config)
    driver_args = data_types.DriverArgs(enable_cache=args.enable_cache)
    beam_pipeline_args = json.loads(args.beam_pipeline_args)
    additional_pipeline_args = json.loads(args.additional_pipeline_args)

    launcher = component_launcher_class.create(
        component=component,
        pipeline_info=data_types.PipelineInfo(
            pipeline_name=args.pipeline_name,
            pipeline_root=args.pipeline_root,
            run_id=args.run_id,
        ),
        driver_args=driver_args,
        metadata_connection=metadata.Metadata(
            connection_config=metadata_config),
        beam_pipeline_args=beam_pipeline_args,
        additional_pipeline_args=additional_pipeline_args,
        component_config=component_config)

    # Attach necessary labels to distinguish different runner and DSL.
    with telemetry_utils.scoped_labels({
            telemetry_utils.LABEL_TFX_RUNNER:
            'kubernetes',
    }):
        launcher.launch()
Ejemplo n.º 12
0
    def run(
        self,
        component: base_node.BaseNode,
        enable_cache: bool = True,
        beam_pipeline_args: Optional[List[Text]] = None
    ) -> execution_result.ExecutionResult:
        """Run a given TFX component in the interactive context.

    Args:
      component: Component instance to be run.
      enable_cache: whether caching logic should be enabled in the driver.
      beam_pipeline_args: Optional Beam pipeline args for beam jobs within
        executor. Executor will use beam DirectRunner as Default. If provided,
        will override beam_pipeline_args specified in constructor.

    Returns:
      execution_result.ExecutionResult object.
    """
        run_id = datetime.datetime.now().isoformat()
        pipeline_info = data_types.PipelineInfo(
            pipeline_name=self.pipeline_name,
            pipeline_root=self.pipeline_root,
            run_id=run_id)
        driver_args = data_types.DriverArgs(enable_cache=enable_cache,
                                            interactive_resolution=True)
        metadata_connection = metadata.Metadata(
            self.metadata_connection_config)
        beam_pipeline_args = beam_pipeline_args or self.beam_pipeline_args
        additional_pipeline_args = {}
        for name, output in component.outputs.items():
            for artifact in output.get():
                artifact.pipeline_name = self.pipeline_name
                artifact.producer_component = component.id
                artifact.name = name
        # Special treatment for pip dependencies.
        # TODO(b/187122662): Pass through pip dependencies as a first-class
        # component flag.
        if isinstance(component, base_component.BaseComponent):
            component._resolve_pip_dependencies(self.pipeline_root)  # pylint: disable=protected-access
        # TODO(hongyes): figure out how to resolve launcher class in the interactive
        # context.
        launcher = in_process_component_launcher.InProcessComponentLauncher.create(
            component, pipeline_info, driver_args, metadata_connection,
            beam_pipeline_args, additional_pipeline_args)
        try:
            import colab  # pytype: disable=import-error # pylint: disable=g-import-not-at-top, unused-import, unused-variable
            runner_label = 'interactivecontext-colab'
        except ImportError:
            runner_label = 'interactivecontext'
        with telemetry_utils.scoped_labels({
                telemetry_utils.LABEL_TFX_RUNNER:
                runner_label,
        }):
            execution_id = launcher.launch().execution_id

        return execution_result.ExecutionResult(component=component,
                                                execution_id=execution_id)
Ejemplo n.º 13
0
    def run(self,
            pipeline: tfx_pipeline.Pipeline,
            parameter_values: Optional[Dict[Text, Any]] = None,
            write_out: Optional[bool] = True) -> Dict[Text, Any]:
        """Compiles a pipeline DSL object into pipeline file.

    Args:
      pipeline: TFX pipeline object.
      parameter_values: mapping from runtime parameter names to its values.
      write_out: set to True to actually write out the file to the place
        designated by output_dir and output_filename. Otherwise return the
        JSON-serialized pipeline job spec.

    Returns:
      Returns the JSON pipeline job spec.

    Raises:
      RuntimeError: if trying to write out to a place occupied by an existing
      file.
    """
        # TODO(b/166343606): Support user-provided labels.
        # TODO(b/169095387): Deprecate .run() method in favor of the unified API
        # client.
        display_name = (self._config.display_name
                        or pipeline.pipeline_info.pipeline_name)
        pipeline_spec = pipeline_builder.PipelineBuilder(
            tfx_pipeline=pipeline,
            default_image=self._config.default_image,
            default_commands=self._config.default_commands).build()
        pipeline_spec.sdk_version = 'tfx-{}'.format(version.__version__)
        pipeline_spec.schema_version = _SCHEMA_VERSION
        runtime_config = pipeline_builder.RuntimeConfigBuilder(
            pipeline_info=pipeline.pipeline_info,
            parameter_values=parameter_values).build()
        with telemetry_utils.scoped_labels(
            {telemetry_utils.LABEL_TFX_RUNNER: 'kubeflow_v2'}):
            result = pipeline_spec_pb2.PipelineJob(
                display_name=display_name
                or pipeline.pipeline_info.pipeline_name,
                labels=telemetry_utils.get_labels_dict(),
                runtime_config=runtime_config)
        result.pipeline_spec.update(json_format.MessageToDict(pipeline_spec))
        pipeline_json_dict = json_format.MessageToDict(result)
        if write_out:
            if fileio.exists(
                    self._output_dir) and not fileio.isdir(self._output_dir):
                raise RuntimeError('Output path: %s is pointed to a file.' %
                                   self._output_dir)
            if not fileio.exists(self._output_dir):
                fileio.makedirs(self._output_dir)

            with fileio.open(
                    os.path.join(self._output_dir, self._output_filename),
                    'wb') as f:
                f.write(json.dumps(pipeline_json_dict, sort_keys=True))

        return pipeline_json_dict
Ejemplo n.º 14
0
    def run(self, tfx_pipeline: pipeline.Pipeline) -> None:
        """Deploys given logical pipeline on Beam.

    Args:
      tfx_pipeline: Logical pipeline containing pipeline args and components.
    """
        # For CLI, while creating or updating pipeline, pipeline_args are extracted
        # and hence we avoid executing the pipeline.
        if 'TFX_JSON_EXPORT_PIPELINE_ARGS_PATH' in os.environ:
            return

        tfx_pipeline.pipeline_info.run_id = datetime.datetime.now().isoformat()

        with telemetry_utils.scoped_labels(
            {telemetry_utils.LABEL_TFX_RUNNER: 'beam'}):
            with beam.Pipeline(argv=self._beam_orchestrator_args) as p:
                # Uses for triggering the component DoFns.
                root = p | 'CreateRoot' >> beam.Create([None])

                # Stores mapping of component to its signal.
                signal_map = {}
                # pipeline.components are in topological order.
                for component in tfx_pipeline.components:
                    # TODO(b/187122662): Pass through pip dependencies as a first-class
                    # component flag.
                    if isinstance(component, base_component.BaseComponent):
                        component._resolve_pip_dependencies(  # pylint: disable=protected-access
                            tfx_pipeline.pipeline_info.pipeline_root)
                    component_id = component.id

                    # Signals from upstream components.
                    signals_to_wait = []
                    if component.upstream_nodes:
                        for upstream_node in component.upstream_nodes:
                            assert upstream_node in signal_map, (
                                'Components is not in '
                                'topological order')
                            signals_to_wait.append(signal_map[upstream_node])
                    absl.logging.info(
                        'Component %s depends on %s.', component_id,
                        [s.producer.full_label for s in signals_to_wait])

                    (component_launcher_class, component_config
                     ) = config_utils.find_component_launch_info(
                         self._config, component)

                    # Each signal is an empty PCollection. AsIter ensures component will
                    # be triggered after upstream components are finished.
                    signal_map[component] = (
                        root
                        | 'Run[%s]' % component_id >> beam.ParDo(
                            _ComponentAsDoFn(component,
                                             component_launcher_class,
                                             component_config, tfx_pipeline), *
                            [beam.pvalue.AsIter(s) for s in signals_to_wait]))
                    absl.logging.info('Component %s is scheduled.',
                                      component_id)
Ejemplo n.º 15
0
    def _assertDeployModelMockCalls(self,
                                    expected_models_create_body=None,
                                    expected_versions_create_body=None,
                                    expect_set_default=True):
        if not expected_models_create_body:
            expected_models_create_body = {
                'name': self._model_name,
                'regions': [],
            }

        if not expected_versions_create_body:
            with telemetry_utils.scoped_labels({
                    telemetry_utils.LABEL_TFX_EXECUTOR:
                    self._executor_class_path
            }):
                labels = telemetry_utils.get_labels_dict()

            expected_versions_create_body = {
                'name':
                self._model_version,
                'deployment_uri':
                self._serving_path,
                'runtime_version':
                runner._get_tf_runtime_version(tf.__version__),
                'python_version':
                runner._get_caip_python_version(
                    runner._get_tf_runtime_version(tf.__version__)),
                'labels':
                labels
            }

        self._mock_models_create.assert_called_with(
            body=mock.ANY,
            parent='projects/{}'.format(self._project_id),
        )
        (_, models_create_kwargs) = self._mock_models_create.call_args
        self.assertDictEqual(expected_models_create_body,
                             models_create_kwargs['body'])

        self._mock_versions_create.assert_called_with(
            body=mock.ANY,
            parent='projects/{}/models/{}'.format(self._project_id,
                                                  self._model_name))
        (_, versions_create_kwargs) = self._mock_versions_create.call_args

        self.assertDictEqual(expected_versions_create_body,
                             versions_create_kwargs['body'])

        if not expect_set_default:
            return

        self._mock_set_default.assert_called_with(
            name='projects/{}/models/{}/versions/{}'.format(
                self._project_id, self._model_name, self._model_version))
        self._mock_set_default_execute.assert_called_with()
Ejemplo n.º 16
0
  def run(self, pipeline: pipeline_pb2.Pipeline) -> None:
    """Deploys given logical pipeline on Beam.

    Args:
      pipeline: Logical pipeline in IR format.
    """
    # For CLI, while creating or updating pipeline, pipeline_args are extracted
    # and hence we avoid deploying the pipeline.
    if 'TFX_JSON_EXPORT_PIPELINE_ARGS_PATH' in os.environ:
      return

    # TODO(b/163003901): Support beam DAG runner args through IR.
    # TODO(b/163003901): MLMD connection config should be passed in via IR.
    connection_config = metadata_store_pb2.ConnectionConfig()
    connection_config.sqlite.SetInParent()
    mlmd_connection = metadata.Metadata(
        connection_config=connection_config)

    with telemetry_utils.scoped_labels(
        {telemetry_utils.LABEL_TFX_RUNNER: 'beam'}):
      with beam.Pipeline() as p:
        # Uses for triggering the component DoFns.
        root = p | 'CreateRoot' >> beam.Create([None])

        # Stores mapping of component to its signal.
        signal_map = {}
        # pipeline.components are in topological order.
        for node in pipeline.nodes:
          # TODO(b/160882349): Support subpipeline
          pipeline_node = node.pipeline_node
          component_id = pipeline_node.node_info.id

          # Signals from upstream components.
          signals_to_wait = []
          for upstream_node in pipeline_node.upstream_nodes:
            assert upstream_node in signal_map, ('Components is not in '
                                                 'topological order')
            signals_to_wait.append(signal_map[upstream_node])
          logging.info('Component %s depends on %s.', component_id,
                       [s.producer.full_label for s in signals_to_wait])

          # Each signal is an empty PCollection. AsIter ensures component will
          # be triggered after upstream components are finished.
          # LINT.IfChange
          signal_map[component_id] = (
              root
              | 'Run[%s]' % component_id >> beam.ParDo(
                  _PipelineNodeAsDoFn(pipeline_node, mlmd_connection,
                                      pipeline.pipeline_info,
                                      pipeline.runtime_spec), *
                  [beam.pvalue.AsIter(s) for s in signals_to_wait]))
          # LINT.ThenChange(../beam/beam_dag_runner.py)
          logging.info('Component %s is scheduled.', component_id)
Ejemplo n.º 17
0
  def __init__(self, context: Optional[Context] = None):
    """Constructs a beam based executor."""
    self._context = context
    self._beam_pipeline_args = context.beam_pipeline_args if context else None

    if self._beam_pipeline_args:
      self._beam_pipeline_args = dependency_utils.make_beam_dependency_flags(
          self._beam_pipeline_args)
      executor_class_path = '%s.%s' % (self.__class__.__module__,
                                       self.__class__.__name__)
      # TODO(zhitaoli): Rethink how we can add labels and only normalize them
      # if the job is submitted against GCP.
      with telemetry_utils.scoped_labels(
          {telemetry_utils.LABEL_TFX_EXECUTOR: executor_class_path}):
        self._beam_pipeline_args.extend(telemetry_utils.make_beam_labels_args())
Ejemplo n.º 18
0
def _airflow_component_launcher(
        component: base_node.BaseNode, component_launcher_class: Type[
            base_component_launcher.BaseComponentLauncher],
        pipeline_info: data_types.PipelineInfo,
        driver_args: data_types.DriverArgs,
        metadata_connection_config: metadata_store_pb2.ConnectionConfig,
        beam_pipeline_args: List[Text], additional_pipeline_args: Dict[Text,
                                                                       Any],
        component_config: base_component_config.BaseComponentConfig,
        exec_properties: Dict[Text, Any], **kwargs) -> None:
    """Helper function to launch TFX component execution.

  This helper function will be called with Airflow env objects which contains
  run_id that we need to pass into TFX ComponentLauncher.

  Args:
    component: TFX BaseComponent instance. This instance holds all inputs and
      outputs placeholders as well as component properties.
    component_launcher_class: The class of the launcher to launch the component.
    pipeline_info: A data_types.PipelineInfo instance that holds pipeline
      properties
    driver_args: Component specific args for driver.
    metadata_connection_config: Configuration for how to connect to metadata.
    beam_pipeline_args: Pipeline arguments for Beam powered Components.
    additional_pipeline_args: A dict of additional pipeline args.
    component_config: Component config to launch the component.
    exec_properties: Execution properties from the ComponentSpec.
    **kwargs: Context arguments that will be passed in by Airflow, including:
      - ti: TaskInstance object from which we can get run_id of the running
        pipeline.
      For more details, please refer to the code:
      https://github.com/apache/airflow/blob/master/airflow/operators/python_operator.py
  """
    component.exec_properties.update(exec_properties)

    # Populate run id from Airflow task instance.
    pipeline_info.run_id = kwargs['ti'].get_dagrun().run_id
    launcher = component_launcher_class.create(
        component=component,
        pipeline_info=pipeline_info,
        driver_args=driver_args,
        metadata_connection=metadata.Metadata(metadata_connection_config),
        beam_pipeline_args=beam_pipeline_args,
        additional_pipeline_args=additional_pipeline_args,
        component_config=component_config)
    with telemetry_utils.scoped_labels(
        {telemetry_utils.LABEL_TFX_RUNNER: 'airflow'}):
        launcher.launch()
Ejemplo n.º 19
0
    def testDeployModelForAIPPredictionWithCustomRegion(self, mock_discovery):
        mock_discovery.build.return_value = self._mock_api_client
        self._setUpPredictionMocks()

        self._ai_platform_serving_args['regions'] = ['custom-region']
        runner.deploy_model_for_aip_prediction(self._serving_path,
                                               self._model_version,
                                               self._ai_platform_serving_args,
                                               self._executor_class_path)

        self._mock_models_create.assert_called_with(
            body=mock.ANY,
            parent='projects/{}'.format(self._project_id),
        )
        (_, models_create_kwargs) = self._mock_models_create.call_args
        models_create_body = models_create_kwargs['body']
        self.assertDictEqual(
            {
                'name': 'model_name',
                'regions': ['custom-region']
            }, models_create_body)

        self._mock_versions_create.assert_called_with(
            body=mock.ANY,
            parent='projects/{}/models/{}'.format(self._project_id,
                                                  'model_name'))
        (_, versions_create_kwargs) = self._mock_versions_create.call_args
        versions_create_body = versions_create_kwargs['body']
        with telemetry_utils.scoped_labels(
            {telemetry_utils.TFX_EXECUTOR: self._executor_class_path}):
            labels = telemetry_utils.get_labels_dict()
        runtime_version = runner._get_tf_runtime_version(tf.__version__)
        self.assertDictEqual(
            {
                'name': self._model_version,
                'deployment_uri': self._serving_path,
                'runtime_version': runtime_version,
                'python_version':
                runner._get_caip_python_version(runtime_version),
                'labels': labels,
            }, versions_create_body)
        self._mock_get.assert_called_with(name='op_name')

        self._mock_set_default.assert_called_with(
            name='projects/{}/models/{}/versions/{}'.format(
                self._project_id, 'model_name', self._model_version))
        self._mock_set_default_execute.assert_called_with()
Ejemplo n.º 20
0
  def run(self, tfx_pipeline: pipeline.Pipeline) -> None:
    """Runs given logical pipeline locally.

    Args:
      tfx_pipeline: Logical pipeline containing pipeline args and components.
    """
    # For CLI, while creating or updating pipeline, pipeline_args are extracted
    # and hence we avoid executing the pipeline.
    if 'TFX_JSON_EXPORT_PIPELINE_ARGS_PATH' in os.environ:
      return

    tfx_pipeline.pipeline_info.run_id = datetime.datetime.now().isoformat()

    with telemetry_utils.scoped_labels(
        {telemetry_utils.LABEL_TFX_RUNNER: 'local'}):
      # Run each component. Note that the pipeline.components list is in
      # topological order.
      #
      # TODO(b/171319478): After IR-based execution is used, used multi-threaded
      # execution so that independent components can be run in parallel.
      for component in tfx_pipeline.components:
        # TODO(b/187122662): Pass through pip dependencies as a first-class
        # component flag.
        if isinstance(component, base_component.BaseComponent):
          component._resolve_pip_dependencies(  # pylint: disable=protected-access
              tfx_pipeline.pipeline_info.pipeline_root)
        (component_launcher_class, component_config) = (
            config_utils.find_component_launch_info(self._config, component))
        driver_args = data_types.DriverArgs(
            enable_cache=tfx_pipeline.enable_cache)
        metadata_connection = metadata.Metadata(
            tfx_pipeline.metadata_connection_config)
        node_launcher = component_launcher_class.create(
            component=component,
            pipeline_info=tfx_pipeline.pipeline_info,
            driver_args=driver_args,
            metadata_connection=metadata_connection,
            beam_pipeline_args=tfx_pipeline.beam_pipeline_args,
            additional_pipeline_args=tfx_pipeline.additional_pipeline_args,
            component_config=component_config)
        logging.info('Component %s is running.', component.id)
        node_launcher.launch()
        logging.info('Component %s is finished.', component.id)
Ejemplo n.º 21
0
    def testMakeBeamLabelsArgs(self):
        """Test for make_beam_labels_args."""
        beam_pipeline_args = telemetry_utils.make_beam_labels_args()
        expected_beam_pipeline_args = [
            '--labels',
            'tfx_py_version=%d-%d' %
            (sys.version_info.major, sys.version_info.minor),
            '--labels',
            'tfx_version=%s' % version.__version__.replace('.', '-'),
        ]
        self.assertListEqual(expected_beam_pipeline_args, beam_pipeline_args)

        with telemetry_utils.scoped_labels(
            {telemetry_utils.LABEL_TFX_EXECUTOR: 'TestExecutor'}):
            beam_pipeline_args = telemetry_utils.make_beam_labels_args()
            expected_beam_pipeline_args = [
                '--labels',
                'tfx_executor=third_party_executor',
            ] + expected_beam_pipeline_args
            self.assertListEqual(expected_beam_pipeline_args,
                                 beam_pipeline_args)
Ejemplo n.º 22
0
    def testDoBlessedOnRegionalEndpoint(self, mock_runner, _):
        self._exec_properties = {
            'custom_config': {
                constants.SERVING_ARGS_KEY: {
                    'model_name': 'model_name',
                    'project_id': 'project_id'
                },
                constants.ENDPOINT_ARGS_KEY:
                'https://ml-us-west1.googleapis.com',
            },
        }
        self._model_blessing.uri = os.path.join(self._source_data_dir,
                                                'model_validator/blessed')
        self._model_blessing.set_int_custom_property('blessed', 1)
        mock_runner.get_service_name_and_api_version.return_value = ('ml',
                                                                     'v1')
        version = self._model_push.get_string_custom_property('pushed_version')
        mock_runner.deploy_model_for_aip_prediction.return_value = (
            'projects/project_id/models/model_name/versions/{}'.format(version)
        )

        self._executor.Do(self._input_dict, self._output_dict,
                          self._serialize_custom_config_under_test())
        executor_class_path = '%s.%s' % (self._executor.__class__.__module__,
                                         self._executor.__class__.__name__)
        with telemetry_utils.scoped_labels(
            {telemetry_utils.LABEL_TFX_EXECUTOR: executor_class_path}):
            job_labels = telemetry_utils.make_labels_dict()
        mock_runner.deploy_model_for_aip_prediction.assert_called_once_with(
            serving_path=self._model_push.uri,
            model_version_name=mock.ANY,
            ai_platform_serving_args=mock.ANY,
            api=mock.ANY,
            labels=job_labels,
        )
        self.assertPushed()
        self.assertEqual(
            self._model_push.get_string_custom_property('pushed_destination'),
            'projects/project_id/models/model_name/versions/{}'.format(
                version))
Ejemplo n.º 23
0
 def testDoBlessedOnRegionalEndpoint_Vertex(self, mock_runner):
     endpoint_uri = 'projects/project_id/locations/us-west1/endpoints/12345'
     mock_runner.deploy_model_for_aip_prediction.return_value = endpoint_uri
     self._exec_properties_vertex = {
         'custom_config': {
             constants.SERVING_ARGS_KEY: {
                 'model_name': 'model_name',
                 'project_id': 'project_id'
             },
             constants.VERTEX_CONTAINER_IMAGE_URI_KEY:
             self._container_image_uri_vertex,
             constants.ENABLE_VERTEX_KEY: True,
             constants.VERTEX_REGION_KEY: 'us-west1',
         },
     }
     self._model_blessing.uri = os.path.join(self._source_data_dir,
                                             'model_validator/blessed')
     self._model_blessing.set_int_custom_property('blessed', 1)
     self._executor.Do(self._input_dict, self._output_dict,
                       self._serialize_custom_config_under_test_vertex())
     executor_class_path = '%s.%s' % (self._executor.__class__.__module__,
                                      self._executor.__class__.__name__)
     with telemetry_utils.scoped_labels(
         {telemetry_utils.LABEL_TFX_EXECUTOR: executor_class_path}):
         job_labels = telemetry_utils.make_labels_dict()
     mock_runner.deploy_model_for_aip_prediction.assert_called_once_with(
         serving_path=self._model_push.uri,
         model_version_name=mock.ANY,
         ai_platform_serving_args=mock.ANY,
         labels=job_labels,
         serving_container_image_uri=self._container_image_uri_vertex,
         endpoint_region='us-west1',
         enable_vertex=True,
     )
     self.assertPushed()
     self.assertEqual(
         self._model_push.get_string_custom_property('pushed_destination'),
         endpoint_uri)
Ejemplo n.º 24
0
    def testDeployModelForAIPPredictionWithCustomRuntime(self, mock_discovery):
        mock_discovery.build.return_value = self._mock_api_client
        self._setUpPredictionMocks()

        self._ai_platform_serving_args['runtime_version'] = '1.23.45'
        runner.deploy_model_for_aip_prediction(self._serving_path,
                                               self._model_version,
                                               self._ai_platform_serving_args,
                                               self._executor_class_path)

        with telemetry_utils.scoped_labels(
            {telemetry_utils.LABEL_TFX_EXECUTOR: self._executor_class_path}):
            labels = telemetry_utils.get_labels_dict()

        expected_versions_create_body = {
            'name': self._model_version,
            'deployment_uri': self._serving_path,
            'runtime_version': '1.23.45',
            'python_version': runner._get_caip_python_version('1.23.45'),
            'labels': labels,
        }
        self._assertDeployModelMockCalls(
            expected_versions_create_body=expected_versions_create_body)
Ejemplo n.º 25
0
    def run(self, pipeline: pipeline_pb2.Pipeline) -> None:
        """Deploys given logical pipeline on Beam.

    Args:
      pipeline: Logical pipeline in IR format.
    """
        # For CLI, while creating or updating pipeline, pipeline_args are extracted
        # and hence we avoid deploying the pipeline.
        if 'TFX_JSON_EXPORT_PIPELINE_ARGS_PATH' in os.environ:
            return

        run_id = datetime.datetime.now().isoformat()
        # Substitute the runtime parameter to be a concrete run_id
        runtime_parameter_utils.substitute_runtime_parameter(
            pipeline, {
                constants.PIPELINE_RUN_ID_PARAMETER_NAME: run_id,
            })

        # TODO(b/163003901): Support beam DAG runner args through IR.
        deployment_config = self._extract_deployment_config(pipeline)
        connection_config = deployment_config.metadata_connection_config
        mlmd_connection = metadata.Metadata(
            connection_config=connection_config)

        with telemetry_utils.scoped_labels(
            {telemetry_utils.LABEL_TFX_RUNNER: 'beam'}):
            with beam.Pipeline() as p:
                # Uses for triggering the component DoFns.
                root = p | 'CreateRoot' >> beam.Create([None])

                # Stores mapping of component to its signal.
                signal_map = {}
                # pipeline.components are in topological order.
                for node in pipeline.nodes:
                    # TODO(b/160882349): Support subpipeline
                    pipeline_node = node.pipeline_node
                    component_id = pipeline_node.node_info.id
                    executor_spec = self._extract_executor_spec(
                        deployment_config, component_id)
                    custom_driver_spec = self._extract_custom_driver_spec(
                        deployment_config, component_id)

                    # Signals from upstream components.
                    signals_to_wait = []
                    for upstream_node in pipeline_node.upstream_nodes:
                        assert upstream_node in signal_map, (
                            'Components is not in '
                            'topological order')
                        signals_to_wait.append(signal_map[upstream_node])
                    logging.info(
                        'Component %s depends on %s.', component_id,
                        [s.producer.full_label for s in signals_to_wait])

                    # Each signal is an empty PCollection. AsIter ensures component will
                    # be triggered after upstream components are finished.
                    # LINT.IfChange
                    signal_map[component_id] = (
                        root
                        | 'Run[%s]' % component_id >> beam.ParDo(
                            self._PIPELINE_NODE_DO_FN_CLS(
                                pipeline_node=pipeline_node,
                                mlmd_connection=mlmd_connection,
                                pipeline_info=pipeline.pipeline_info,
                                pipeline_runtime_spec=pipeline.runtime_spec,
                                executor_spec=executor_spec,
                                custom_driver_spec=custom_driver_spec), *
                            [beam.pvalue.AsIter(s) for s in signals_to_wait]))
                    # LINT.ThenChange(../beam/beam_dag_runner.py)
                    logging.info('Component %s is scheduled.', component_id)
Ejemplo n.º 26
0
  def Do(self, input_dict: Dict[Text, List[types.Artifact]],
         output_dict: Dict[Text, List[types.Artifact]],
         exec_properties: Dict[Text, Any]):
    """Overrides the tfx_pusher_executor.

    Args:
      input_dict: Input dict from input key to a list of artifacts, including:
        - model_export: exported model from trainer.
        - model_blessing: model blessing path from model_validator.
      output_dict: Output dict from key to a list of artifacts, including:
        - model_push: A list of 'ModelPushPath' artifact of size one. It will
          include the model in this push execution if the model was pushed.
      exec_properties: Mostly a passthrough input dict for
        tfx.components.Pusher.executor.  custom_config.bigquery_serving_args is
        consumed by this class.  For the full set of parameters supported by
        Big Query ML, refer to https://cloud.google.com/bigquery-ml/

    Returns:
      None
    Raises:
      ValueError:
        If bigquery_serving_args is not in exec_properties.custom_config.
        If pipeline_root is not 'gs://...'
      RuntimeError: if the Big Query job failed.
    """
    self._log_startup(input_dict, output_dict, exec_properties)
    model_push = artifact_utils.get_single_instance(
        output_dict[tfx_pusher_executor.PUSHED_MODEL_KEY])
    if not self.CheckBlessing(input_dict):
      self._MarkNotPushed(model_push)
      return

    model_export = artifact_utils.get_single_instance(
        input_dict[tfx_pusher_executor.MODEL_KEY])
    model_export_uri = model_export.uri

    custom_config = json_utils.loads(
        exec_properties.get(_CUSTOM_CONFIG_KEY, 'null'))
    if custom_config is not None and not isinstance(custom_config, Dict):
      raise ValueError('custom_config in execution properties needs to be a '
                       'dict.')

    bigquery_serving_args = custom_config.get(SERVING_ARGS_KEY)
    # if configuration is missing error out
    if bigquery_serving_args is None:
      raise ValueError('Big Query ML configuration was not provided')

    bq_model_uri = '.'.join([
        bigquery_serving_args[_PROJECT_ID_KEY],
        bigquery_serving_args[_BQ_DATASET_ID_KEY],
        bigquery_serving_args[_MODEL_NAME_KEY],
    ])

    # Deploy the model.
    io_utils.copy_dir(
        src=path_utils.serving_model_path(model_export_uri), dst=model_push.uri)
    model_path = model_push.uri
    if not model_path.startswith(_GCS_PREFIX):
      raise ValueError('pipeline_root must be gs:// for BigQuery ML Pusher.')

    logging.info('Deploying the model to BigQuery ML for serving: %s from %s',
                 bigquery_serving_args, model_path)

    query = _BQML_CREATE_OR_REPLACE_MODEL_QUERY_TEMPLATE.format(
        model_uri=bq_model_uri, model_path=model_path)

    # TODO(zhitaoli): Refactor the executor_class_path creation into a common
    # utility function.
    executor_class_path = '%s.%s' % (self.__class__.__module__,
                                     self.__class__.__name__)
    with telemetry_utils.scoped_labels(
        {telemetry_utils.LABEL_TFX_EXECUTOR: executor_class_path}):
      default_query_job_config = bigquery.job.QueryJobConfig(
          labels=telemetry_utils.get_labels_dict())
    client = bigquery.Client(default_query_job_config=default_query_job_config)

    try:
      query_job = client.query(query)
      query_job.result()  # Waits for the query to finish
    except Exception as e:
      raise RuntimeError('BigQuery ML Push failed: {}'.format(e))

    logging.info('Successfully deployed model %s serving from %s', bq_model_uri,
                 model_path)

    # Setting the push_destination to bigquery uri
    self._MarkPushed(model_push, pushed_destination=bq_model_uri)
Ejemplo n.º 27
0
  def Do(self, input_dict: Dict[Text, List[types.Artifact]],
         output_dict: Dict[Text, List[types.Artifact]],
         exec_properties: Dict[Text, Any]) -> None:
    """Runs batch inference on a given model with given input examples.

    This function creates a new model (if necessary) and a new model version
    before inference, and cleans up resources after inference. It provides
    re-executability as it cleans up (only) the model resources that are created
    during the process even inference job failed.

    Args:
      input_dict: Input dict from input key to a list of Artifacts.
        - examples: examples for inference.
        - model: exported model.
        - model_blessing: model blessing result
      output_dict: Output dict from output key to a list of Artifacts.
        - output: bulk inference results.
      exec_properties: A dict of execution properties.
        - data_spec: JSON string of bulk_inferrer_pb2.DataSpec instance.
        - custom_config: custom_config.ai_platform_serving_args need to contain
          the serving job parameters sent to Google Cloud AI Platform. For the
          full set of parameters, refer to
          https://cloud.google.com/ml-engine/reference/rest/v1/projects.models

    Returns:
      None
    """
    self._log_startup(input_dict, output_dict, exec_properties)
    if 'examples' not in input_dict:
      raise ValueError('\'examples\' is missing in input dict.')
    if 'inference_result' not in output_dict:
      raise ValueError('\'inference_result\' is missing in output dict.')
    output = artifact_utils.get_single_instance(output_dict['inference_result'])
    if 'model' not in input_dict:
      raise ValueError('Input models are not valid, model '
                       'need to be specified.')
    if 'model_blessing' in input_dict:
      model_blessing = artifact_utils.get_single_instance(
          input_dict['model_blessing'])
      if not model_utils.is_model_blessed(model_blessing):
        output.set_int_custom_property('inferred', 0)
        logging.info('Model on %s was not blessed', model_blessing.uri)
        return
    else:
      logging.info('Model blessing is not provided, exported model will be '
                   'used.')
    if _CUSTOM_CONFIG_KEY not in exec_properties:
      raise ValueError('Input exec properties are not valid, {} '
                       'need to be specified.'.format(_CUSTOM_CONFIG_KEY))

    custom_config = json_utils.loads(
        exec_properties.get(_CUSTOM_CONFIG_KEY, 'null'))
    if custom_config is not None and not isinstance(custom_config, Dict):
      raise ValueError('custom_config in execution properties needs to be a '
                       'dict.')
    ai_platform_serving_args = custom_config.get(SERVING_ARGS_KEY)
    if not ai_platform_serving_args:
      raise ValueError(
          '\'ai_platform_serving_args\' is missing in \'custom_config\'')
    service_name, api_version = runner.get_service_name_and_api_version(
        ai_platform_serving_args)
    executor_class_path = '%s.%s' % (self.__class__.__module__,
                                     self.__class__.__name__)
    with telemetry_utils.scoped_labels(
        {telemetry_utils.LABEL_TFX_EXECUTOR: executor_class_path}):
      job_labels = telemetry_utils.get_labels_dict()
    model = artifact_utils.get_single_instance(input_dict['model'])
    model_path = path_utils.serving_model_path(model.uri)
    logging.info('Use exported model from %s.', model_path)
    # Use model artifact uri to generate model version to guarantee the
    # 1:1 mapping from model version to model.
    model_version = 'version_' + hashlib.sha256(model.uri.encode()).hexdigest()
    inference_spec = self._get_inference_spec(model_path, model_version,
                                              ai_platform_serving_args)
    data_spec = bulk_inferrer_pb2.DataSpec()
    json_format.Parse(exec_properties['data_spec'], data_spec)
    api = discovery.build(service_name, api_version)
    new_model_created = False
    try:
      new_model_created = runner.create_model_for_aip_prediction_if_not_exist(
          api, job_labels, ai_platform_serving_args)
      runner.deploy_model_for_aip_prediction(
          api,
          model_path,
          model_version,
          ai_platform_serving_args,
          job_labels,
          skip_model_creation=True,
          set_default_version=False,
      )
      self._run_model_inference(data_spec, input_dict['examples'], output.uri,
                                inference_spec)
    except Exception as e:
      logging.error('Error in executing CloudAIBulkInferrerComponent: %s',
                    str(e))
      output.set_int_custom_property('inferred', 0)
      raise
    finally:
      # Guarantee newly created resources are cleaned up even if theinference
      # job failed.

      # Clean up the newly deployed model.
      runner.delete_model_version_from_aip_if_exists(api, model_version,
                                                     ai_platform_serving_args)
      if new_model_created:
        runner.delete_model_from_aip_if_exists(api, ai_platform_serving_args)
    # Mark the inferenence as successful after resources are cleaned up.
    output.set_int_custom_property('inferred', 1)
Ejemplo n.º 28
0
def main():
    # Log to the container's stdout so Kubeflow Pipelines UI can display logs to
    # the user.
    logging.basicConfig(stream=sys.stdout, level=logging.INFO)
    logging.getLogger().setLevel(logging.INFO)

    parser = argparse.ArgumentParser()
    parser.add_argument('--pipeline_name', type=str, required=True)
    parser.add_argument('--pipeline_root', type=str, required=True)
    parser.add_argument('--kubeflow_metadata_config', type=str, required=True)
    parser.add_argument('--beam_pipeline_args', type=str, required=True)
    parser.add_argument('--additional_pipeline_args', type=str, required=True)
    parser.add_argument('--component_launcher_class_path',
                        type=str,
                        required=True)
    parser.add_argument('--enable_cache', action='store_true')
    parser.add_argument('--serialized_component', type=str, required=True)
    parser.add_argument('--component_config', type=str, required=True)

    args = parser.parse_args()

    component = json_utils.loads(args.serialized_component)
    component_config = json_utils.loads(args.component_config)
    component_launcher_class = import_utils.import_class_by_path(
        args.component_launcher_class_path)
    if not issubclass(component_launcher_class,
                      base_component_launcher.BaseComponentLauncher):
        raise TypeError(
            'component_launcher_class "%s" is not subclass of base_component_launcher.BaseComponentLauncher'
            % component_launcher_class)

    kubeflow_metadata_config = kubeflow_pb2.KubeflowMetadataConfig()
    json_format.Parse(args.kubeflow_metadata_config, kubeflow_metadata_config)
    metadata_connection = kubeflow_metadata_adapter.KubeflowMetadataAdapter(
        _get_metadata_connection_config(kubeflow_metadata_config))
    driver_args = data_types.DriverArgs(enable_cache=args.enable_cache)

    beam_pipeline_args = _make_beam_pipeline_args(args.beam_pipeline_args)

    additional_pipeline_args = json.loads(args.additional_pipeline_args)

    launcher = component_launcher_class.create(
        component=component,
        pipeline_info=data_types.PipelineInfo(
            pipeline_name=args.pipeline_name,
            pipeline_root=args.pipeline_root,
            run_id=os.environ['WORKFLOW_ID']),
        driver_args=driver_args,
        metadata_connection=metadata_connection,
        beam_pipeline_args=beam_pipeline_args,
        additional_pipeline_args=additional_pipeline_args,
        component_config=component_config)

    # Attach necessary labels to distinguish different runner and DSL.
    # TODO(zhitaoli): Pass this from KFP runner side when the same container
    # entrypoint can be used by a different runner.
    with telemetry_utils.scoped_labels({
            telemetry_utils.TFX_RUNNER: 'kfp',
    }):
        execution_info = launcher.launch()

    # Dump the UI metadata.
    _dump_ui_metadata(component, execution_info)
Ejemplo n.º 29
0
  def run(self, pipeline: Union[pipeline_pb2.Pipeline,
                                pipeline_py.Pipeline]) -> None:
    """Deploys given logical pipeline on Beam.

    Args:
      pipeline: Logical pipeline in IR format.
    """
    # For CLI, while creating or updating pipeline, pipeline_args are extracted
    # and hence we avoid deploying the pipeline.
    if 'TFX_JSON_EXPORT_PIPELINE_ARGS_PATH' in os.environ:
      return

    if isinstance(pipeline, pipeline_py.Pipeline):
      c = compiler.Compiler()
      pipeline = c.compile(pipeline)

    run_id = datetime.datetime.now().strftime('%Y%m%d-%H%M%S.%f')
    # Substitute the runtime parameter to be a concrete run_id
    runtime_parameter_utils.substitute_runtime_parameter(
        pipeline, {
            constants.PIPELINE_RUN_ID_PARAMETER_NAME: run_id,
        })

    deployment_config = self._extract_deployment_config(pipeline)
    connection_config = self._connection_config_from_deployment_config(
        deployment_config)

    logging.info('Running pipeline:\n %s', pipeline)
    logging.info('Using deployment config:\n %s', deployment_config)
    logging.info('Using connection config:\n %s', connection_config)

    with telemetry_utils.scoped_labels(
        {telemetry_utils.LABEL_TFX_RUNNER: 'beam'}):
      with beam.Pipeline() as p:
        # Uses for triggering the node DoFns.
        root = p | 'CreateRoot' >> beam.Create([None])

        # Stores mapping of node to its signal.
        signal_map = {}
        # pipeline.nodes are in topological order.
        for node in pipeline.nodes:
          # TODO(b/160882349): Support subpipeline
          pipeline_node = node.pipeline_node
          node_id = pipeline_node.node_info.id
          executor_spec = self._extract_executor_spec(
              deployment_config, node_id)
          custom_driver_spec = self._extract_custom_driver_spec(
              deployment_config, node_id)

          # Signals from upstream nodes.
          signals_to_wait = []
          for upstream_node in pipeline_node.upstream_nodes:
            assert upstream_node in signal_map, ('Nodes are not in '
                                                 'topological order')
            signals_to_wait.append(signal_map[upstream_node])
          logging.info('Node %s depends on %s.', node_id,
                       [s.producer.full_label for s in signals_to_wait])

          # Each signal is an empty PCollection. AsIter ensures a node will
          # be triggered after upstream nodes are finished.
          signal_map[node_id] = (
              root
              | 'Run[%s]' % node_id >> beam.ParDo(
                  self._PIPELINE_NODE_DO_FN_CLS(
                      pipeline_node=pipeline_node,
                      mlmd_connection_config=connection_config,
                      pipeline_info=pipeline.pipeline_info,
                      pipeline_runtime_spec=pipeline.runtime_spec,
                      executor_spec=executor_spec,
                      custom_driver_spec=custom_driver_spec,
                      deployment_config=deployment_config),
                  *[beam.pvalue.AsIter(s) for s in signals_to_wait]))
          logging.info('Node %s is scheduled.', node_id)
Ejemplo n.º 30
0
Archivo: runner.py Proyecto: zvrr/tfx
def start_aip_training(input_dict: Dict[Text, List[types.Artifact]],
                       output_dict: Dict[Text, List[types.Artifact]],
                       exec_properties: Dict[Text,
                                             Any], executor_class_path: Text,
                       training_inputs: Dict[Text,
                                             Any], job_id: Optional[Text]):
  """Start a trainer job on AI Platform (AIP).

  This is done by forwarding the inputs/outputs/exec_properties to the
  tfx.scripts.run_executor module on a AI Platform training job interpreter.

  Args:
    input_dict: Passthrough input dict for tfx.components.Trainer.executor.
    output_dict: Passthrough input dict for tfx.components.Trainer.executor.
    exec_properties: Passthrough input dict for tfx.components.Trainer.executor.
    executor_class_path: class path for TFX core default trainer.
    training_inputs: Training input argument for AI Platform training job.
      'pythonModule', 'pythonVersion' and 'runtimeVersion' will be inferred. For
      the full set of parameters, refer to
      https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#TrainingInput
    job_id: Job ID for AI Platform Training job. If not supplied,
      system-determined unique ID is given. Refer to
    https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#resource-job

  Returns:
    None
  Raises:
    RuntimeError: if the Google Cloud AI Platform training job failed/cancelled.
  """
  training_inputs = training_inputs.copy()

  json_inputs = artifact_utils.jsonify_artifact_dict(input_dict)
  logging.info('json_inputs=\'%s\'.', json_inputs)
  json_outputs = artifact_utils.jsonify_artifact_dict(output_dict)
  logging.info('json_outputs=\'%s\'.', json_outputs)
  json_exec_properties = json.dumps(exec_properties, sort_keys=True)
  logging.info('json_exec_properties=\'%s\'.', json_exec_properties)

  # Configure AI Platform training job
  api_client = discovery.build('ml', 'v1')

  # We use custom containers to launch training on AI Platform, which invokes
  # the specified image using the container's entrypoint. The default
  # entrypoint for TFX containers is to call scripts/run_executor.py. The
  # arguments below are passed to this run_executor entry to run the executor
  # specified in `executor_class_path`.
  job_args = [
      '--executor_class_path', executor_class_path, '--inputs', json_inputs,
      '--outputs', json_outputs, '--exec-properties', json_exec_properties
  ]

  if not training_inputs.get('masterConfig'):
    training_inputs['masterConfig'] = {
        'imageUri': _TFX_IMAGE,
    }

  training_inputs['args'] = job_args

  # Pop project_id so AIP doesn't complain about an unexpected parameter.
  # It's been a stowaway in aip_args and has finally reached its destination.
  project = training_inputs.pop('project')
  project_id = 'projects/{}'.format(project)
  with telemetry_utils.scoped_labels(
      {telemetry_utils.LABEL_TFX_EXECUTOR: executor_class_path}):
    job_labels = telemetry_utils.get_labels_dict()

  # 'tfx_YYYYmmddHHMMSS' is the default job ID if not explicitly specified.
  job_id = job_id or 'tfx_{}'.format(
      datetime.datetime.now().strftime('%Y%m%d%H%M%S'))
  job_spec = {
      'jobId': job_id,
      'trainingInput': training_inputs,
      'labels': job_labels,
  }

  # Submit job to AIP Training
  logging.info('Submitting job=\'%s\', project=\'%s\' to AI Platform.', job_id,
               project)
  request = api_client.projects().jobs().create(
      body=job_spec, parent=project_id)
  request.execute()

  # Wait for AIP Training job to finish
  job_name = '{}/jobs/{}'.format(project_id, job_id)
  request = api_client.projects().jobs().get(name=job_name)
  response = request.execute()
  retry_count = 0

  # Monitors the long-running operation by polling the job state periodically,
  # and retries the polling when a transient connectivity issue is encountered.
  #
  # Long-running operation monitoring:
  #   The possible states of "get job" response can be found at
  #   https://cloud.google.com/ai-platform/training/docs/reference/rest/v1/projects.jobs#State
  #   where SUCCEEDED/FAILED/CANCELLED are considered to be final states.
  #   The following logic will keep polling the state of the job until the job
  #   enters a final state.
  #
  # During the polling, if a connection error was encountered, the GET request
  # will be retried by recreating the Python API client to refresh the lifecycle
  # of the connection being used. See
  # https://github.com/googleapis/google-api-python-client/issues/218
  # for a detailed description of the problem. If the error persists for
  # _CONNECTION_ERROR_RETRY_LIMIT consecutive attempts, the function will exit
  # with code 1.
  while response['state'] not in ('SUCCEEDED', 'FAILED', 'CANCELLED'):
    time.sleep(_POLLING_INTERVAL_IN_SECONDS)
    try:
      response = request.execute()
      retry_count = 0
    # Handle transient connection error.
    except ConnectionError as err:
      if retry_count < _CONNECTION_ERROR_RETRY_LIMIT:
        retry_count += 1
        logging.warning(
            'ConnectionError (%s) encountered when polling job: %s. Trying to '
            'recreate the API client.', err, job_id)
        # Recreate the Python API client.
        api_client = discovery.build('ml', 'v1')
        request = api_client.projects().jobs().get(name=job_name)
      else:
        # TODO(b/158433873): Consider raising the error instead of exit with
        # code 1 after CMLE supports configurable retry policy.
        # Currently CMLE will automatically retry the job unless return code
        # 1-128 is returned.
        logging.error('Request failed after %s retries.',
                      _CONNECTION_ERROR_RETRY_LIMIT)
        sys.exit(1)

  if response['state'] in ('FAILED', 'CANCELLED'):
    err_msg = 'Job \'{}\' did not succeed.  Detailed response {}.'.format(
        job_name, response)
    logging.error(err_msg)
    raise RuntimeError(err_msg)

  # AIP training complete
  logging.info('Job \'%s\' successful.', job_name)