예제 #1
0
    def test_annotations_stripping(self):
        import collections
        import typing

        MyFuncOutputs = typing.NamedTuple('Outputs', [('sum', int),
                                                      ('product', int)])

        class CustomType1:
            pass

        def my_func(
            param1: CustomType1 = None,  # This caused failure previously
            param2: collections.
            OrderedDict = None,  # This caused failure previously
        ) -> MyFuncOutputs:  # This caused failure previously
            assert param1 == None
            assert param2 == None
            return (8, 15)

        task_factory = comp.create_component_from_func(my_func)

        self.helper_test_component_using_local_call(task_factory,
                                                    arguments={},
                                                    expected_output_values={
                                                        'sum': '8',
                                                        'product': '15'
                                                    })
예제 #2
0
    def test_handling_list_arguments_containing_serializable_python_objects(
            self):
        """Checks that lists containing python objects with .to_struct() can be
        properly serialized."""
        class MyClass:
            def to_struct(self):
                return {'foo': [7, 42]}

        def assert_values_are_correct(
            list_param: list,
            dict_param: dict,
        ) -> int:
            import unittest
            unittest.TestCase().assertEqual(list_param,
                                            [1, {
                                                'foo': [7, 42]
                                            }, 3])
            unittest.TestCase().assertEqual(dict_param,
                                            {'k1': {
                                                'foo': [7, 42]
                                            }})
            return 1

        task_factory = comp.create_component_from_func(
            assert_values_are_correct)

        self.helper_test_component_using_local_call(
            task_factory,
            arguments=dict(
                list_param=[1, MyClass(), 3],
                dict_param={'k1': MyClass()},
            ),
            expected_output_values={'Output': '1'},
        )
예제 #3
0
    def test_code_with_escapes(self):
        def my_func():
            """Hello \n world."""

        task_factory = comp.create_component_from_func(my_func)
        self.helper_test_component_using_local_call(task_factory,
                                                    arguments={},
                                                    expected_output_values={})
예제 #4
0
    def test_argument_serialization_success(self):
        from typing import List

        def my_func(args: List[int]):
            print(args)

        task_factory = comp.create_component_from_func(my_func)
        self.helper_test_component_using_local_call(
            task_factory, arguments={'args': [1, 2, 3]})
예제 #5
0
    def test_fail_on_handling_list_arguments_containing_python_objects(self):
        """Checks that lists containing python objects not having .to_struct()
        raise error during serialization."""
        class MyClass:
            pass

        def consume_list(list_param: list, ) -> int:
            return 1

        def consume_dict(dict_param: dict, ) -> int:
            return 1

        list_op = comp.create_component_from_func(consume_list)
        dict_op = comp.create_component_from_func(consume_dict)

        with self.assertRaises(Exception):
            list_op([1, MyClass(), 3])

        with self.assertRaises(Exception):
            dict_op({'k1': MyClass()})
예제 #6
0
def artifact_passing_pipeline():
    producer_task = producer_op()
    processor_task = processor_op(producer_task.outputs['output_1'],
                                  producer_task.outputs['output_2'])
    consumer_task = consumer_op(processor_task.outputs['output_1'],
                                processor_task.outputs['output_2'])

    markdown_task = create_component_from_func(func=metadata_and_metrics)()
    # This line is only needed for compiling using dsl-compile to work
    kfp.dsl.get_pipeline_conf(
    ).data_passing_method = volume_based_data_passing_method
예제 #7
0
    def test_component_annotations(self):
        def some_func():
            pass

        annotations = {
            'key1': 'value1',
            'key2': 'value2',
        }
        task_factory = comp.create_component_from_func(some_func,
                                                       annotations=annotations)
        component_spec = task_factory.component_spec
        self.assertEqual(component_spec.metadata.annotations, annotations)
예제 #8
0
    def test_argument_serialization_failure(self):
        from typing import Sequence

        def my_func(args: Sequence[int]):
            print(args)

        task_factory = comp.create_component_from_func(my_func)
        with self.assertRaisesRegex(
                TypeError,
                'There are no registered serializers for type "(typing.)?Sequence(\[int\])?"'
        ):
            self.helper_test_component_using_local_call(
                task_factory, arguments={'args': [1, 2, 3]})
예제 #9
0
    def test_handling_list_arguments_containing_pipelineparam(self):
        """Checks that lists containing PipelineParam can be properly
        serialized."""

        def consume_list(list_param: list) -> int:
            pass

        import kfp.deprecated as kfp
        task_factory = create_component_from_func(consume_list)
        task = task_factory([1, 2, 3, kfp.dsl.PipelineParam('aaa'), 4, 5, 6])

        full_command_line = task.command + task.arguments
        for arg in full_command_line:
            self.assertNotIn('PipelineParam', arg)
예제 #10
0
def get_run_info(run_id: str):
    """Example of getting run info for current pipeline run."""
    print(f'Current run ID is {run_id}.')
    # KFP API server is usually available as ml-pipeline service in the same
    # namespace, but for full Kubeflow deployment, you need to edit this to
    # http://ml-pipeline.kubeflow:8888, because your pipelines are running in
    # user namespaces, but the API is at kubeflow namespace.
    import kfp
    client = kfp.Client(host='http://ml-pipeline:8888')
    run_info = client.get_run(run_id=run_id)
    # Hide verbose info
    print(run_info.run)


get_run_info_component = components.create_component_from_func(
    func=get_run_info,
    packages_to_install=['kfp'],
)


@dsl.pipeline(
    name='use-run-id',
    description=
    'A pipeline that demonstrates how to use run information, including run ID etc.'
)
def pipeline_use_run_id(run_id: str = kfp.dsl.RUN_ID_PLACEHOLDER):
    """kfp.dsl.RUN_ID_PLACEHOLDER inside a pipeline parameter will be populated
    with KFP Run ID at runtime."""
    run_info_op = get_run_info_component(run_id=run_id)


if __name__ == '__main__':
예제 #11
0
"""Two step v2-compatible pipeline."""

from kfp.deprecated import components, dsl
from kfp.deprecated.components import InputPath, OutputPath


def preprocess(uri: str, some_int: int, output_parameter_one: OutputPath(int),
               output_dataset_one: OutputPath('Dataset')):
    """Dummy Preprocess Step."""
    with open(output_dataset_one, 'w') as f:
        f.write('Output dataset')
    with open(output_parameter_one, 'w') as f:
        f.write("{}".format(1234))


preprocess_op = components.create_component_from_func(preprocess,
                                                      base_image='python:3.9')


@components.create_component_from_func
def train_op(dataset: InputPath('Dataset'),
             model: OutputPath('Model'),
             num_steps: int = 100):
    """Dummy Training Step."""

    with open(dataset, 'r') as input_file:
        input_string = input_file.read()
        with open(model, 'w') as output_file:
            for i in range(num_steps):
                output_file.write("Step {}\n{}\n=====\n".format(
                    i, input_string))
# limitations under the License.

from kfp.deprecated import dsl, components, compiler
from typing import NamedTuple


def training_job():
    import torch
    use_cuda = torch.cuda.is_available()
    print(f'The gpus status is: {use_cuda}')
    if not use_cuda:
        raise ValueError('GPU not available')


training_comp = components.create_component_from_func(
    training_job,
    base_image='pytorch/pytorch:1.7.1-cuda11.0-cudnn8-runtime',
    packages_to_install=[])


@components.create_component_from_func
def generate_resource_constraints_request(
) -> NamedTuple('output', [('gpu_vendor', str), ('nbr_gpus', str),
                           ('constrain_type', str), ('constrain_value', str)]):
    """Returns the gpu resource and constraints settings"""
    from collections import namedtuple
    output = namedtuple(
        'output',
        ['gpu_vendor', 'nbr_gpu', 'constrain_type', 'constrain_value'])

    return output('nvidia.com/gpu', '1', 'cloud.google.com/gke-accelerator',
                  'nvidia-tesla-p4')
예제 #13
0
def flip_coin(force_flip_result: str = '') -> str:
    """Flip a coin and output heads or tails randomly."""
    if force_flip_result:
        return force_flip_result
    import random
    result = 'heads' if random.randint(0, 1) == 0 else 'tails'
    return result


def print_msg(msg: str):
    """Print a message."""
    print(msg)


flip_coin_op = components.create_component_from_func(flip_coin)

print_op = components.create_component_from_func(print_msg)


@dsl.pipeline(name='condition')
def condition(text: str = 'condition test', force_flip_result: str = ''):
    flip1 = flip_coin_op(force_flip_result)
    print_op(flip1.output)

    with dsl.Condition(flip1.output == 'heads'):
        flip2 = flip_coin_op()
        print_op(flip2.output)
        print_op(text)

예제 #14
0
파일: fail.py 프로젝트: rpatil524/pipelines
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Fail pipeline."""

from kfp.deprecated import components, dsl


def fail():
    '''Fails'''
    import sys
    sys.exit(1)


fail_op = components.create_component_from_func(fail,
                                                base_image='alpine:latest')


@dsl.pipeline(name='fail-pipeline')
def fail_pipeline():
    fail_task = fail_op()
예제 #15
0
        ])

    model = create_model()
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir,
                                                          histogram_freq=1)

    model.fit(x=x_train,
              y=y_train,
              epochs=5,
              validation_data=(x_test, y_test),
              callbacks=[tensorboard_callback])


# Be careful when choosing a tensorboard image:
# * tensorflow/tensorflow may fail with image pull backoff, because of dockerhub rate limiting.
# * tensorboard in tensorflow 2.3+ does not work with KFP, refer to https://github.com/kubeflow/pipelines/issues/5521.
train_op = create_component_from_func(
    train, base_image='gcr.io/deeplearning-platform-release/tf2-cpu.2-4')


@dsl.pipeline(name='pipeline-tensorboard-gcs')
def my_pipeline(
    log_dir=f'gs://{{kfp-default-bucket}}/tensorboard/logs/{dsl.RUN_ID_PLACEHOLDER}'
):
    prepare_tb_task = prepare_tensorboard(log_dir_uri=log_dir)
    tensorboard_task = train_op(log_dir=log_dir).after(prepare_tb_task)
예제 #16
0
                log_dir, os.path.relpath(start=log_dir_local, path=path)
            )
            client.fput_object(
                bucket_name=log_bucket,
                object_name=object_name,
                file_path=path,
            )
            count = count + 1
            print(f'{path} uploaded to minio://{log_bucket}/{object_name}')
    print(f'{count} log files uploaded to minio://{log_bucket}/{log_dir}')


# tensorflow/tensorflow:2.4 may fail with image pull backoff, because of dockerhub rate limiting.
train_op = create_component_from_func(
    train,
    base_image='gcr.io/deeplearning-platform-release/tf2-cpu.2-3:latest',
    packages_to_install=['minio'],  # TODO: pin minio version
)


@dsl.pipeline(name='pipeline-tensorboard-minio')
def my_pipeline(
    minio_endpoint: str = 'minio-service:9000',
    log_bucket: str = 'mlpipeline',
    log_dir: str = f'tensorboard/logs/{dsl.RUN_ID_PLACEHOLDER}',
    # Pin to tensorflow 2.3, because in 2.4+ tensorboard cannot load in KFP:
    # refer to https://github.com/kubeflow/pipelines/issues/5521.
    tf_image: str = 'gcr.io/deeplearning-platform-release/tf2-cpu.2-3:latest'
):
    # tensorboard uses s3 protocol to access minio
    prepare_tb_task = prepare_tensorboard(
예제 #17
0
''')


# Accessing GCS using the Google Cloud Python library
def gcs_list_buckets():
    from google.cloud import storage
    storage_client = storage.Client()
    buckets = storage_client.list_buckets()
    print("List of buckets:")
    for bucket in buckets:
        print(bucket.name)


gcs_list_buckets_op = components.create_component_from_func(
    gcs_list_buckets,
    base_image='python:3.7',
    packages_to_install=['google-cloud-storage==1.31.2'],
)


@dsl.pipeline(
    name='secret-pipeline',
    description='A pipeline to demonstrate mounting and use of secretes.'
)
def secret_op_pipeline(
    url='gs://ml-pipeline/sample-data/shakespeare/shakespeare1.txt'):
  """A pipeline that uses secret to access cloud hosted resouces."""

  gcs_list_items_task = gcs_list_items_op(url)
  gcs_list_buckets_task = gcs_list_buckets_op()