Ejemplo n.º 1
0
    def setUp(self):
        self._output_dict = {'output_name': [standard_artifacts.Examples()]}
        self._pipeline_properties = base_component.PipelineProperties(
            output_dir='output_dir',
            log_root='log_root',
        )

        with dsl.Pipeline('test_pipeline'):
            self.component = base_component.BaseComponent(
                component_name='TFXComponent',
                input_dict=collections.OrderedDict([
                    ('input_data', 'input-data-contents'),
                    ('train_steps', 300),
                    ('accuracy_threshold', 0.3),
                ]),
                output_dict=self._output_dict,
                exec_properties=collections.OrderedDict([
                    ('module_file', '/path/to/module.py')
                ]),
                executor_class_path='some.executor.Class',
                pipeline_properties=self._pipeline_properties,
            )
Ejemplo n.º 2
0
    def _construct_pipeline_graph(self, pipeline):
        """Constructs a Kubeflow Pipeline graph.

    Args:
      pipeline: The logical TFX pipeline to base the construction on.
    """
        output_dir = os.path.join(pipeline.pipeline_args['pipeline_root'],
                                  pipeline.pipeline_args['pipeline_name'])
        beam_pipeline_args = []
        tfx_image = None
        if 'additional_pipeline_args' in pipeline.pipeline_args:
            additional_pipeline_args = pipeline.pipeline_args[
                'additional_pipeline_args']
            beam_pipeline_args = additional_pipeline_args.get(
                'beam_pipeline_args', [])
            tfx_image = additional_pipeline_args.get('tfx_image')

        pipeline_properties = base_component.PipelineProperties(
            output_dir=output_dir,
            log_root=pipeline.pipeline_args['log_root'],
            beam_pipeline_args=beam_pipeline_args,
            tfx_image=tfx_image,
        )

        # producers is a map from an output Channel, to a Kubeflow component that
        # is responsible for the named output represented by the Channel.
        # Assumption: Channels are unique in a pipeline.
        producers = {}

        # Assumption: There is a partial ordering of components in the list, i.e.,
        # if component A depends on component B and C, then A appears after B and C
        # in the list.
        for component in pipeline.components:
            input_dict = {}
            for input_name, input_channel in component.input_dict.items():
                if input_channel in producers:
                    output = getattr(
                        producers[input_channel]['component'].outputs,
                        producers[input_channel]['channel_name'])

                    if not isinstance(output, dsl.PipelineParam):
                        raise ValueError(
                            'Component outputs should be of type dsl.PipelineParam.'
                            ' Got type {} for output {}'.format(
                                type(output), output))
                    input_dict[input_name] = output
                else:
                    input_dict[input_name] = json.dumps(
                        [x.json_dict() for x in input_channel.get()])
            executor_class_path = '.'.join(
                [component.executor.__module__, component.executor.__name__])
            kfp_component = base_component.BaseComponent(
                component_name=component.component_name,
                input_dict=input_dict,
                output_dict=self._prepare_output_dict(component.outputs),
                exec_properties=component.exec_properties,
                executor_class_path=executor_class_path,
                pipeline_properties=pipeline_properties)

            for channel_name, channel in component.outputs.get_all().items():
                producers[channel] = {}
                producers[channel]['component'] = kfp_component
                producers[channel]['channel_name'] = channel_name