Esempio n. 1
0
def _compile_pipeline_function(
    pipeline_funcs: List[Callable],
    function_name: Optional[str],
    pipeline_parameters: Optional[Mapping[str, Any]],
    package_path: str,
    type_check: bool,
    use_experimental: bool,
) -> None:
    """Compiles a pipeline function.

    Args:
      pipeline_funcs: A list of pipeline_functions.
      function_name: The name of the pipeline function to compile if there were
        multiple.
      pipeline_parameters: The pipeline parameters as a dict of {name: value}.
      package_path: The output path of the compiled result.
      type_check: Whether to enable the type checking.
    """
    if len(pipeline_funcs) == 0:
        raise ValueError(
            'A function with @dsl.pipeline decorator is required in the py file.'
        )

    if len(pipeline_funcs) > 1 and not function_name:
        func_names = [x.__name__ for x in pipeline_funcs]
        raise ValueError(
            'There are multiple pipelines: %s. Please specify --function.' %
            func_names)

    if function_name:
        pipeline_func = next(
            (x for x in pipeline_funcs if x.__name__ == function_name), None)
        if not pipeline_func:
            raise ValueError('The function "%s" does not exist. '
                             'Did you forget @dsl.pipeline decoration?' %
                             function_name)
    else:
        pipeline_func = pipeline_funcs[0]

    if use_experimental:
        experimental_compiler.Compiler().compile(
            pipeline_func=pipeline_func,
            pipeline_parameters=pipeline_parameters,
            package_path=package_path,
            type_check=type_check)

    else:
        compiler.Compiler().compile(pipeline_func=pipeline_func,
                                    pipeline_parameters=pipeline_parameters,
                                    package_path=package_path,
                                    type_check=type_check)
Esempio n. 2
0
    def test_compile_simple_pipeline(self):

        tmpdir = tempfile.mkdtemp()
        try:
            producer_op = components.load_component_from_text("""
      name: producer
      inputs:
      - {name: input_param, type: String}
      outputs:
      - {name: output_model, type: Model}
      - {name: output_value, type: Integer}
      implementation:
        container:
          image: gcr.io/my-project/my-image:tag
          args:
          - {inputValue: input_param}
          - {outputPath: output_model}
          - {outputPath: output_value}
      """)

            consumer_op = components.load_component_from_text("""
      name: consumer
      inputs:
      - {name: input_model, type: Model}
      - {name: input_value, type: Integer}
      implementation:
        container:
          image: gcr.io/my-project/my-image:tag
          args:
          - {inputPath: input_model}
          - {inputValue: input_value}
      """)

            @dsl.pipeline(name='test-pipeline')
            def simple_pipeline(pipeline_input: str = 'Hello KFP!'):
                producer = producer_op(input_param=pipeline_input)
                consumer = consumer_op(
                    input_model=producer.outputs['output_model'],
                    input_value=producer.outputs['output_value'])

            target_json_file = os.path.join(tmpdir, 'result.json')
            compiler.Compiler().compile(pipeline_func=simple_pipeline,
                                        package_path=target_json_file)

            self.assertTrue(os.path.exists(target_json_file))
            with open(target_json_file, 'r') as f:
                print(f.read())
        finally:
            shutil.rmtree(tmpdir)
Esempio n. 3
0
    with open(model.path, 'w') as output_file:
        for i in range(num_steps):
            output_file.write('Step {}\n{}\n=====\n'.format(i, line))

    # model is an instance of Model artifact, which has a .metadata dictionary
    # to store arbitrary metadata for the output artifact.
    model.metadata['accuracy'] = 0.9


@dsl.pipeline(pipeline_root='dummy_root', name='my-test-pipeline-beta')
def pipeline(message: str, input_dict: Dict[str, int] = {'A': 1, 'B': 2}):

    preprocess_task = preprocess(
        message=message,
        input_dict_parameter=input_dict,
        input_list_parameter=['a', 'b', 'c'],
    )
    train_task = train(
        dataset_one_path=preprocess_task.outputs['output_dataset_one'],
        dataset_two=preprocess_task.outputs['output_dataset_two_path'],
        message=preprocess_task.outputs['output_parameter_path'],
        input_bool=preprocess_task.outputs['output_bool_parameter_path'],
        input_dict=preprocess_task.outputs['output_dict_parameter_path'],
        input_list=preprocess_task.outputs['output_list_parameter_path'],
    )


if __name__ == '__main__':
    compiler.Compiler().compile(pipeline_func=pipeline,
                                package_path=__file__.replace('.py', '.json'))
Esempio n. 4
0
component_op_2 = components.load_component_from_text("""
name: Read from GCS
inputs:
- {name: input_gcs_path, type: GCSPath, description: 'GCS file path'}
implementation:
  container:
    image: google/cloud-sdk:slim
    command:
    - sh
    - -c
    - |
      set -e -x
      gsutil cat "$0"
    - {inputUri: input_gcs_path}
""")


@dsl.pipeline(name='simple-two-step-pipeline', pipeline_root='dummy_root')
def my_pipeline(text: str = 'Hello world!'):
    component_1 = component_op_1(text=text).set_display_name('Producer')
    component_2 = component_op_2(
        input_gcs_path=component_1.outputs['output_gcs_path'])
    component_2.set_display_name('Consumer')


if __name__ == '__main__':
    compiler.Compiler().compile(pipeline_func=my_pipeline,
                                pipeline_parameters={'text': 'Hello KFP!'},
                                package_path=__file__.replace('.py', '.json'))