def two_step_pipeline_with_task_only_dependency() -> tfx_pipeline.Pipeline: """Returns a simple 2-step pipeline with task only dependency between them.""" step_1 = container_component.create_container_component( name='Step 1', inputs={}, outputs={}, parameters={}, image='step-1-image', command=['run', 'step-1'])() step_2 = container_component.create_container_component( name='Step 2', inputs={}, outputs={}, parameters={}, image='step-2-image', command=['run', 'step-2'])() step_2.add_upstream_node(step_1) return tfx_pipeline.Pipeline( pipeline_name='two-step-task-only-dependency-pipeline', pipeline_root=_TEST_PIPELINE_ROOT, components=[step_1, step_2], )
def testCreateComponent(self): component = kfp_container_component.load_kfp_yaml_container_component( os.path.join(self._testdata_path, 'kfp_container_component_test.yaml')) ref_component = container_component.create_container_component( name='Test_Kfp_Container_Component', image='image1', command=[ 'command1', 'command2', 'command3', placeholders.InputUriPlaceholder('Directory'), placeholders.InputValuePlaceholder('Subpath'), placeholders.OutputUriPlaceholder('File'), '--arg1', placeholders.InputUriPlaceholder('input1'), '--arg2', placeholders.InputValuePlaceholder('input2'), '--arg3', placeholders.OutputUriPlaceholder('output1'), ], inputs={ 'input1': standard_artifacts.String, 'input2': standard_artifacts.String, }, outputs={ 'output1': standard_artifacts.String, }, parameters={}, ) self.assertEqual(type(component), type(ref_component)) self.assertEqual(ref_component.EXECUTOR_SPEC, component.EXECUTOR_SPEC)
def tasks_for_pipeline_with_artifact_value_passing(): """A simple pipeline with artifact consumed as value.""" producer_component = container_component.create_container_component( name='Produce', outputs={ 'data': simple_artifacts.File, }, parameters={ 'message': str, }, image='gcr.io/ml-pipeline/mirrors/cloud-sdk', command=[ 'sh', '-exc', """ message="$0" output_data_uri="$1" output_data_path=$(mktemp) # Running the main code echo "Hello $message" >"$output_data_path" # Getting data out of the container gsutil cp -r "$output_data_path" "$output_data_uri" """, placeholders.InputValuePlaceholder('message'), placeholders.OutputUriPlaceholder('data'), ], ) print_value_component = container_component.create_container_component( name='Print', inputs={ 'text': simple_artifacts.File, }, image='gcr.io/ml-pipeline/mirrors/cloud-sdk', command=[ 'echo', placeholders.InputValuePlaceholder('text'), ], ) producer_task = producer_component(message='World!') print_task = print_value_component(text=producer_task.outputs['data'], ) return [producer_task, print_task]
def load_kfp_yaml_container_component( path: Text) -> Callable[..., base_component.BaseComponent]: """Creates a container-based component from a Kubeflow component spec. See https://www.kubeflow.org/docs/pipelines/reference/component-spec/ Example: component = load_kfp_yaml_container_component( "kfp_pipelines_root/components/datasets/Chicago_Taxi_Trips/component.yaml" ) Args: path: local file path of a Kubeflow Pipelines component YAML file. Returns: Container component that can be instantiated in a TFX pipeline. """ with open(path) as component_file: data = yaml.load(component_file, Loader=yaml.FullLoader) _convert_target_fields_to_kv_pair(data) component_spec = json_format.ParseDict( data, kfp_component_spec_pb2.ComponentSpec()) container = component_spec.implementation.container command = (list(map(_get_command_line_argument_type, container.command)) + list(map(_get_command_line_argument_type, container.args))) # TODO(ericlege): Support classname to class translation in inputs.type inputs = { item.name: standard_artifacts.String for item in component_spec.inputs } outputs = { item.name: standard_artifacts.String for item in component_spec.outputs } parameters = {} return container_component.create_container_component( name=component_spec.name, image=container.image, command=command, inputs=inputs, outputs=outputs, parameters=parameters, )
self._id = '{}.{}'.format(self.__class__.__name__, instance_name) else: self._id = self.__class__.__name__ dummy_transformer_component = container_component.create_container_component( name='DummyContainerSpecComponent', inputs={ 'input1': standard_artifacts.Model, }, outputs={ 'output1': standard_artifacts.Model, }, parameters={ 'param1': str, }, image='dummy/transformer', command=[ 'transformer', '--input1', placeholders.InputUriPlaceholder('input1'), '--output1', placeholders.OutputUriPlaceholder('output1'), '--param1', placeholders.InputValuePlaceholder('param1'), ], ) dummy_producer_component = container_component.create_container_component( name='DummyProducerComponent', outputs={ 'output1': standard_artifacts.Model,
def _wrap_container_component( self, component: base_node.BaseNode, component_launcher_class: Type[ base_component_launcher.BaseComponentLauncher], component_config: Optional[base_component_config.BaseComponentConfig], pipeline: tfx_pipeline.Pipeline, ) -> base_node.BaseNode: """Wrapper for container component. Args: component: Component to be executed. component_launcher_class: The class of the launcher to launch the component. component_config: component config to launch the component. pipeline: Logical pipeline that contains pipeline related information. Returns: A container component that runs the wrapped component upon execution. """ component_launcher_class_path = '.'.join([ component_launcher_class.__module__, component_launcher_class.__name__ ]) serialized_component = json_utils.dumps( node_wrapper.NodeWrapper(component)) arguments = [ '--pipeline_name', pipeline.pipeline_info.pipeline_name, '--pipeline_root', pipeline.pipeline_info.pipeline_root, '--run_id', pipeline.pipeline_info.run_id, '--metadata_config', json_format.MessageToJson( message=get_default_kubernetes_metadata_config(), preserving_proto_field_name=True), '--beam_pipeline_args', json.dumps(pipeline.beam_pipeline_args), '--additional_pipeline_args', json.dumps(pipeline.additional_pipeline_args), '--component_launcher_class_path', component_launcher_class_path, '--serialized_component', serialized_component, '--component_config', json_utils.dumps(component_config), ] # Outputs/Parameters fields are not used as they are contained in # the serialized component. return container_component.create_container_component( name=component.__class__.__name__, outputs={}, parameters={}, image=self._config.tfx_image, command=_CONTAINER_COMMAND + arguments)().with_id(component.id + _WRAPPER_SUFFIX)
from tfx.components import CsvExampleGen from tfx.utils.dsl_utils import external_input from tfx.dsl.component.experimental import container_component from tfx.dsl.component.experimental import placeholders from tfx.types import standard_artifacts data_path = Path(__file__).parent / "data" / "data.csv" # TODO コンテナの中に該当するスクリプトがないのでエラーになる download_data_component = container_component.create_container_component( name='DownloadData', outputs={ 'data_uri': standard_artifacts.ExternalArtifact }, image='google/cloud-sdk:278.0.0', command=[ "sh", "-exc", """ data_uri="$0" python container_component/download_data.py --uri "$data_uri" """, placeholders.OutputUriPlaceholder("data_uri") ] ) xgb_component = container_component.create_container_component( name='XGBTrainer', inputs={ 'data': standard_artifacts.Examples }, # outputs={ # 'model': standard_artifacts.ExternalArtifact
downloader_component = container_component.create_container_component( name='DownloadFromHttp', outputs={ 'data': standard_artifacts.ExternalArtifact, }, parameters={ 'url': str, }, # The component code uses gsutil to upload the data to GCS, so the # container image needs to have gsutil installed and configured. # Fixing b/150670779 by merging cl/294536017 will lift this limitation. image='google/cloud-sdk:278.0.0', command=[ 'sh', '-exc', ''' url="$0" output_data_uri="$1"/data # TODO(b/150515270) Remove when fixed. output_data_path=$(mktemp) # Running the main code wget "$0" -O "$output_data_path" || curl "$0" > "$output_data_path" # Getting data out of the container gsutil cp "$output_data_path" "$output_data_uri" ''', ph.exec_property('url'), ph.output('data')[0].uri, ], )