コード例 #1
0
ファイル: compiler_test.py プロジェクト: yifanmai/tfx
  def _set_up_test_pipeline(self):
    """Builds an Iris example pipeline with slight changes."""
    pipeline_name = "iris"
    iris_root = "iris_root"
    serving_model_dir = os.path.join(iris_root, "serving_model", pipeline_name)
    tfx_root = "tfx_root"
    data_path = os.path.join(tfx_root, "data_path")
    pipeline_root = os.path.join(tfx_root, "pipelines", pipeline_name)
    self.test_pipeline_info = data_types.PipelineInfo(pipeline_name, iris_root)

    example_gen = CsvExampleGen(input=external_input(data_path))

    statistics_gen = StatisticsGen(examples=example_gen.outputs["examples"])

    schema_gen = SchemaGen(
        statistics=statistics_gen.outputs["statistics"],
        infer_feature_shape=True)

    example_validator = ExampleValidator(
        statistics=statistics_gen.outputs["statistics"],
        schema=schema_gen.outputs["schema"])

    trainer = Trainer(
        # Use RuntimeParameter as module_file to test out RuntimeParameter in
        # compiler.
        module_file=data_types.RuntimeParameter(
            name="module_file",
            default=os.path.join(iris_root, "iris_utils.py"),
            ptype=str),
        custom_executor_spec=executor_spec.ExecutorClassSpec(GenericExecutor),
        examples=example_gen.outputs["examples"],
        schema=schema_gen.outputs["schema"],
        train_args=trainer_pb2.TrainArgs(num_steps=2000),
        eval_args=trainer_pb2.EvalArgs(num_steps=5))

    model_resolver = ResolverNode(
        instance_name="latest_blessed_model_resolver",
        resolver_class=latest_blessed_model_resolver.LatestBlessedModelResolver,
        model=Channel(type=Model),
        model_blessing=Channel(type=ModelBlessing))

    eval_config = tfma.EvalConfig(
        model_specs=[tfma.ModelSpec(signature_name="eval")],
        slicing_specs=[tfma.SlicingSpec()],
        metrics_specs=[
            tfma.MetricsSpec(
                thresholds={
                    "sparse_categorical_accuracy":
                        tfma.config.MetricThreshold(
                            value_threshold=tfma.GenericValueThreshold(
                                lower_bound={"value": 0.6}),
                            change_threshold=tfma.GenericChangeThreshold(
                                direction=tfma.MetricDirection.HIGHER_IS_BETTER,
                                absolute={"value": -1e-10}))
                })
        ])
    evaluator = Evaluator(
        examples=example_gen.outputs["examples"],
        model=trainer.outputs["model"],
        baseline_model=model_resolver.outputs["model"],
        eval_config=eval_config)

    pusher = Pusher(
        model=trainer.outputs["model"],
        model_blessing=evaluator.outputs["blessing"],
        push_destination=pusher_pb2.PushDestination(
            filesystem=pusher_pb2.PushDestination.Filesystem(
                base_directory=serving_model_dir)))

    self._pipeline = pipeline.Pipeline(
        pipeline_name=pipeline_name,
        pipeline_root=pipeline_root,
        components=[
            example_gen,
            statistics_gen,
            schema_gen,
            example_validator,
            trainer,
            model_resolver,
            evaluator,
            pusher,
        ],
        enable_cache=True,
        beam_pipeline_args=[])
コード例 #2
0
ファイル: container_entrypoint.py プロジェクト: sycdesign/tfx
def main():
    # Log to the container's stdout so Kubeflow Pipelines UI can display logs to
    # the user.
    logging.basicConfig(stream=sys.stdout, level=logging.INFO)
    logging.getLogger().setLevel(logging.INFO)

    parser = argparse.ArgumentParser()
    parser.add_argument('--pipeline_name', type=str, required=True)
    parser.add_argument('--pipeline_root', type=str, required=True)
    parser.add_argument('--kubeflow_metadata_config', type=str, required=True)
    parser.add_argument('--beam_pipeline_args', type=str, required=True)
    parser.add_argument('--additional_pipeline_args', type=str, required=True)
    parser.add_argument('--component_launcher_class_path',
                        type=str,
                        required=True)
    parser.add_argument('--enable_cache', action='store_true')
    parser.add_argument('--serialized_component', type=str, required=True)
    parser.add_argument('--component_config', type=str, required=True)

    args = parser.parse_args()

    component = json_utils.loads(args.serialized_component)
    component_config = json_utils.loads(args.component_config)
    component_launcher_class = import_utils.import_class_by_path(
        args.component_launcher_class_path)
    if not issubclass(component_launcher_class,
                      base_component_launcher.BaseComponentLauncher):
        raise TypeError(
            'component_launcher_class "%s" is not subclass of base_component_launcher.BaseComponentLauncher'
            % component_launcher_class)

    kubeflow_metadata_config = kubeflow_pb2.KubeflowMetadataConfig()
    json_format.Parse(args.kubeflow_metadata_config, kubeflow_metadata_config)
    metadata_connection = kubeflow_metadata_adapter.KubeflowMetadataAdapter(
        _get_metadata_connection_config(kubeflow_metadata_config))
    driver_args = data_types.DriverArgs(enable_cache=args.enable_cache)

    beam_pipeline_args = json.loads(args.beam_pipeline_args)

    additional_pipeline_args = json.loads(args.additional_pipeline_args)

    launcher = component_launcher_class.create(
        component=component,
        pipeline_info=data_types.PipelineInfo(
            pipeline_name=args.pipeline_name,
            pipeline_root=args.pipeline_root,
            run_id=os.environ['WORKFLOW_ID']),
        driver_args=driver_args,
        metadata_connection=metadata_connection,
        beam_pipeline_args=beam_pipeline_args,
        additional_pipeline_args=additional_pipeline_args,
        component_config=component_config)

    # Attach necessary labels to distinguish different runner and DSL.
    # TODO(zhitaoli): Pass this from KFP runner side when the same container
    # entrypoint can be used by a different runner.
    with telemetry_utils.scoped_labels({
            telemetry_utils.LABEL_TFX_RUNNER: 'kfp',
    }):
        execution_info = launcher.launch()

    # Dump the UI metadata.
    _dump_ui_metadata(component, execution_info)
コード例 #3
0
ファイル: pipeline.py プロジェクト: sycdesign/tfx
  def __init__(self,
               pipeline_name: Text,
               pipeline_root: Text,
               metadata_connection_config: Optional[
                   metadata.ConnectionConfigType] = None,
               components: Optional[List[base_node.BaseNode]] = None,
               enable_cache: Optional[bool] = False,
               beam_pipeline_args: Optional[List[Text]] = None,
               platform_config: Optional[message.Message] = None,
               execution_mode: Optional[ExecutionMode] = ExecutionMode.SYNC,
               **kwargs):
    """Initialize pipeline.

    Args:
      pipeline_name: Name of the pipeline;
      pipeline_root: Path to root directory of the pipeline;
      metadata_connection_config: The config to connect to ML metadata.
      components: A list of components in the pipeline (optional only for
        backward compatible purpose to be used with deprecated
        PipelineDecorator).
      enable_cache: Whether or not cache is enabled for this run.
      beam_pipeline_args: Pipeline arguments for Beam powered Components.
      platform_config: Pipeline level platform config, in proto form.
      execution_mode: The execution mode of the pipeline, can be SYNC or ASYNC.
      **kwargs: Additional kwargs forwarded as pipeline args.
    """
    if len(pipeline_name) > MAX_PIPELINE_NAME_LENGTH:
      raise ValueError('pipeline name %s exceeds maximum allowed lenght' %
                       pipeline_name)
    pipeline_args = dict(kwargs)

    self.pipeline_info = data_types.PipelineInfo(
        pipeline_name=pipeline_name, pipeline_root=pipeline_root)
    self.enable_cache = enable_cache
    self.metadata_connection_config = metadata_connection_config
    self.execution_mode = execution_mode

    self.beam_pipeline_args = beam_pipeline_args or []

    self.platform_config = platform_config

    self.additional_pipeline_args = pipeline_args.get(
        'additional_pipeline_args', {})

    # Store pipeline_args in a json file only when temp file exists.
    pipeline_args.update({
        'pipeline_name': pipeline_name,
        'pipeline_root': pipeline_root,
    })
    if 'TFX_JSON_EXPORT_PIPELINE_ARGS_PATH' in os.environ:
      pipeline_args_path = os.environ.get('TFX_JSON_EXPORT_PIPELINE_ARGS_PATH')
      with open(pipeline_args_path, 'w') as f:
        json.dump(pipeline_args, f)

    # Calls property setter.
    self.components = components or []

    # TODO(b/156000550): Currently `beam_pipeline_args` is set at pipeline
    # level in the SDK. However it is subject to change, to move to per-node
    # configuration. We will need to change the following logic accordingly.
    if not self.beam_pipeline_args:
      return
    for component in components:
      if isinstance(component.executor_spec, executor_spec.ExecutorClassSpec):
        cast(executor_spec.ExecutorClassSpec,
             component.executor_spec).extra_flags.extend(beam_pipeline_args)